diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..7cf221dc1 --- /dev/null +++ b/.clang-format @@ -0,0 +1,250 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always +BreakBeforeBraces: Linux +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +QualifierAlignment: Leave +CompactNamespaces: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: true +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +PackConstructorInitializers: NextLine +BasedOnStyle: '' +ConstructorInitializerAllOnOneLineOrOnePerLine: false +AllowAllConstructorInitializersOnNextLine: true +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 3 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: BeforeHash +IndentExternBlock: AfterExternBlock +IndentRequiresClause: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertBraces: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Left +PPIndentWidth: -1 +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: pb + BasedOnStyle: google +ReferenceAlignment: Pointer +ReflowComments: true +RemoveBracesLLVM: false +RequiresClausePosition: OwnLine +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: c++17 +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..be331deed --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,189 @@ +# This settings file for clang-tidy chooses which checks to run, the settings +# for those checks, etc. It uses as many of the default values as possible and +# runs all checks with some exclusions by default. +# +# The full list of clang-tidy 17 checks and documentation can be found +# [here](https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/clang-tidy/index.html) +# +# The "Checks" command should have 5 sections seperated by a newline: +# 1. Turn on all checks by default. Done with "*" +# 2. Turn off the catagories of checks we don't want +# 3. Turn off specific, individual checks we don't want +# 4. Turn on checks that we do want from from the catagories of checks that we +# didn't want +# 5. Turn off the checks that we do want but that aren't passing yet +--- +Checks: "*, + + -abseil-*, + -altera-*, + -android-*, + -boost-*, + -darwin-*, + -fuchsia-*, + -linuxkernel-*, + -llvmlibc-*, + -*objc*, + -*osx*, + -zircon-*, + + -bugprone-easily-swappable-parameters, + -modernize-use-trailing-return-type, + -readability-avoid-const-params-in-decls, + -readability-static-accessed-through-instance, + -misc-unused-parameters, + -hicpp-multiway-paths-covered, + -cert-err58-cpp, + -cert-dcl37-c, + -cert-dcl51-cpp, + -cppcoreguidelines-pro-bounds-constant-array-index, + -google-readability-braces-around-statements, + -hicpp-braces-around-statements, + + google-readability-avoid-underscore-in-googletest-name, + google-upgrade-googletest-case, + + -bugprone-empty-catch, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-narrowing-conversions, + -bugprone-switch-missing-default-case, + -cert-env33-c, + -cert-err33-c, + -cert-err34-c, + -cert-msc32-c, + -cert-msc51-cpp, + -clang-analyzer-core.CallAndMessage, + -clang-analyzer-core.NullDereference, + -clang-analyzer-core.UndefinedBinaryOperatorResult, + -clang-analyzer-core.uninitialized.ArraySubscript, + -clang-analyzer-core.uninitialized.UndefReturn, + -clang-analyzer-deadcode.DeadStores, + -clang-analyzer-optin.performance.Padding, + -clang-analyzer-security.insecureAPI.strcpy, + -clang-diagnostic-format, + -clang-diagnostic-logical-not-parentheses, + -clang-diagnostic-macro-redefined, + -clang-diagnostic-unknown-cuda-version, + -clang-diagnostic-unused-command-line-argument, + -clang-diagnostic-unused-result, + -concurrency-mt-unsafe, + -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-const-or-ref-data-members, + -cppcoreguidelines-avoid-do-while, + -cppcoreguidelines-avoid-magic-numbers, + -cppcoreguidelines-avoid-non-const-global-variables, + -cppcoreguidelines-explicit-virtual-functions, + -cppcoreguidelines-init-variables, + -cppcoreguidelines-macro-usage, + -cppcoreguidelines-narrowing-conversions, + -cppcoreguidelines-no-malloc, + -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-owning-memory, + -cppcoreguidelines-prefer-member-initializer, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-cstyle-cast, + -cppcoreguidelines-pro-type-member-init, + -cppcoreguidelines-pro-type-reinterpret-cast, + -cppcoreguidelines-pro-type-vararg, + -cppcoreguidelines-special-member-functions, + -cppcoreguidelines-use-default-member-init, + -cppcoreguidelines-virtual-class-destructor, + -google-explicit-constructor, + -google-global-names-in-headers, + -google-readability-casting, + -google-readability-namespace-comments, + -google-readability-todo, + -google-runtime-int, + -hicpp-avoid-c-arrays, + -hicpp-deprecated-headers, + -hicpp-explicit-conversions, + -hicpp-member-init, + -hicpp-no-array-decay, + -hicpp-no-malloc, + -hicpp-special-member-functions, + -hicpp-use-auto, + -hicpp-use-equals-default, + -hicpp-use-noexcept, + -hicpp-use-nullptr, + -hicpp-use-override, + -hicpp-vararg, + -llvm-else-after-return, + -llvm-header-guard, + -llvm-include-order, + -llvm-namespace-comment, + -misc-const-correctness, + -misc-header-include-cycle, + -misc-include-cleaner, + -misc-non-private-member-variables-in-classes, + -misc-use-anonymous-namespace, + -modernize-avoid-c-arrays, + -modernize-deprecated-headers, + -modernize-macro-to-enum, + -modernize-redundant-void-arg, + -modernize-type-traits, + -modernize-type-traits, + -modernize-use-auto, + -modernize-use-default-member-init, + -modernize-use-equals-default, + -modernize-use-nodiscard, + -modernize-use-noexcept, + -modernize-use-nullptr, + -modernize-use-override, + -modernize-use-using, + -openmp-use-default-none, + -performance-avoid-endl, + -performance-unnecessary-value-param, + -readability-container-size-empty, + -readability-convert-member-functions-to-static, + -readability-delete-null-pointer, + -readability-duplicate-include, + -readability-else-after-return, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-inconsistent-declaration-parameter-name, + -readability-isolate-declaration, + -readability-magic-numbers, + -readability-make-member-function-const, + -readability-non-const-parameter, + -readability-redundant-control-flow, + -readability-redundant-preprocessor, + -readability-suspicious-call-argument" +WarningsAsErrors: '' +HeaderFilterRegex: '.*' +FormatStyle: 'file' +UseColor: false +CheckOptions: + readability-braces-around-statements.ShortStatementLines: 1 + # readability-identifier-naming allowed casing types + # - lower_case + # - UPPER_CASE + # - camelBack + # - CamelCase + # - camel_Snake_Back + # - Camel_Snake_Case + # - aNy_CasE + + # readability-identifier-naming.VariableCase: 'lower_case' + # readability-identifier-naming.FunctionCase: 'Camel_Snake_Case' + readability-identifier-naming.NamespaceCase: 'lower_case' + # readability-identifier-naming.MacroDefinitionCase: 'UPPER_CASE' + # readability-identifier-naming.TypedefCase: 'CamelCase' + # readability-identifier-naming.TypeAliasCase: 'CamelCase' + readability-identifier-naming.EnumCase: 'CamelCase' + # readability-identifier-naming.ConstantCase: 'lower_case' + + # readability-identifier-naming.ConstantPrefix: 'k_' + # readability-identifier-naming.GlobalVariablePrefix: 'g_' + + readability-identifier-naming.ClassCase: 'CamelCase' + # readability-identifier-naming.MemberCase: 'lower_case' # This entry might not be needed + # readability-identifier-naming.MethodCase: 'CamelCase' # This entry might not be needed + # readability-identifier-naming.PrivateMemberSuffix: '_' + # readability-identifier-naming.PrivateMethodSuffix: '_' + + hicpp-signed-bitwise.IgnorePositiveIntegerLiterals: 'true' + + bugprone-reserved-identifier.AllowedIdentifiers: '__cudaSafeCall;__cudaCheckError;__shfl_down;__CHOLLA_PRETTY_FUNC__' +... diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 000000000..51e66225d --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,19 @@ +# To tell git blame to ignore these commits run this command in the repo. +# `git config blame.ignoreRevsFile .git-blame-ignore-revs` +# Requires git v2.23 or greater. Each entry must include the full 40 character +# hash + +# Strip all trailing whitespace +40fcc44334cc92572beb726961e23beb6be8ae2f + +# Source Files Reorganization +50ce61188d43f778e5a31a28b95bbc7312a5bbfb +b78d8c96680c9c2d5a5d41656895cb3795e1e204 + +# Reformat Code with clang-format +729ef8ed307eaa2cf42baa1f5af6c389ad614ac4 +fcaa4714241ad764d9ae38159cac5618e59178c8 + +# Reformat Code with clang-format increasing column width to 120 +b779b212b24ed19592ac309eab1c3ccb7ba66212 +8e5b4619734e0922d815f4d259323c68002af6db diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_and_lint.yml similarity index 61% rename from .github/workflows/build_tests.yml rename to .github/workflows/build_and_lint.yml index 19bdabb44..a4308ebd3 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_and_lint.yml @@ -1,4 +1,7 @@ -name: Cholla Compile +name: Build & Lint + +# This runs the HIP Builds. CUDA builds can be reenabled by adding the CUDA +# container to the matrix and uncommenting the CUDA lines on: pull_request: @@ -9,15 +12,15 @@ on: jobs: Build: name: > - Build + Build & Lint: ${{ matrix.container.name }} TYPE=${{ matrix.make-type }} # if: ${{ false }} # If uncommented this line will disable this job # Choose OS/Runner runs-on: ubuntu-latest - container: - image: ${{matrix.container.link}} + container: + image: ${{matrix.container.link}} defaults: run: shell: bash @@ -25,16 +28,13 @@ jobs: strategy: fail-fast: false matrix: - make-type: [hydro, gravity, disk, particles, cosmology, mhd] - container: [{name: "CUDA", link: "docker://alwinm/cholla:cuda_github"}, {name: "HIP",link: "docker://alwinm/cholla:hip_github"},] + make-type: [hydro, gravity, disk, particles, cosmology, mhd, dust, cooling] + # The CUDA container can be added with {name: "CUDA", link: "docker://chollahydro/cholla:cuda_github"} + container: [{name: "HIP",link: "docker://chollahydro/cholla:rocm_github"}] # Setup environment variables env: - CHOLLA_MACHINE: github CHOLLA_MAKE_TYPE: ${{ matrix.make-type }} - CUDA_ROOT: /usr/local/cuda - HDF5_ROOT: /usr/lib/x86_64-linux-gnu/hdf5/serial - MPI_ROOT: /usr/lib/x86_64-linux-gnu/openmpi # Run the job itself steps: @@ -53,22 +53,22 @@ jobs: git --version git config --global --add safe.directory /__w/cholla/cholla git config --global --add safe.directory '*' - - name: Show CUDA and gcc version - if: matrix.container.name == 'CUDA' - run: | - cc --version - c++ --version - nvcc -V + # - name: Show CUDA and gcc version + # if: matrix.container.name == 'CUDA' + # run: | + # cc --version + # c++ --version + # nvcc -V - name: Show HIP and hipcc version if: matrix.container.name == 'HIP' run: | hipcc --version hipconfig --full - # Perform Build - name: Cholla setup run: | + make clobber source builds/run_tests.sh setupTests -c gcc echo "CHOLLA_ROOT = ${CHOLLA_ROOT}" @@ -77,11 +77,6 @@ jobs: echo "CHOLLA_LAUNCH_COMMAND=${CHOLLA_LAUNCH_COMMAND}" >> $GITHUB_ENV echo "F_OFFLOAD=${F_OFFLOAD} >> $GITHUB_ENV echo "CHOLLA_ENVSET=${CHOLLA_ENVSET} >> $GITHUB_ENV - - name: Build GoogleTest - run: | - source builds/run_tests.sh - buildGoogleTest - echo "GOOGLETEST_ROOT=${GOOGLETEST_ROOT}" >> $GITHUB_ENV - name: Build Cholla run: | source builds/run_tests.sh @@ -90,3 +85,17 @@ jobs: run: | source builds/run_tests.sh buildChollaTests + + # Run Clang-tidy + # - name: Run clang-tidy + # if: matrix.container.name == 'CUDA' + # run: make tidy TYPE=${{ matrix.make-type }} CLANG_TIDY_ARGS="--warnings-as-errors=*" + # - name: Display tidy_results_cpp.log + # if: ${{ (matrix.container.name == 'CUDA') && (always()) }} + # run: cat tidy_results_cpp.log + # - name: Display tidy_results_c.log + # if: ${{ (matrix.container.name == 'CUDA') && (always()) }} + # run: cat tidy_results_c.log + # - name: Display tidy_results_gpu.log + # if: ${{ (matrix.container.name == 'CUDA') && (always()) }} + # run: cat tidy_results_gpu.log diff --git a/.github/workflows/code_formatting.yml b/.github/workflows/code_formatting.yml new file mode 100644 index 000000000..6176efac3 --- /dev/null +++ b/.github/workflows/code_formatting.yml @@ -0,0 +1,28 @@ +name: Code Formatting + +on: [pull_request, push] + +jobs: + cpp-format: + runs-on: ubuntu-latest + + # Setup environment variables + env: + CLANG_FORMAT_VERSION: 17 + + steps: + - uses: actions/checkout@v3 + - name: Install clang-format + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - + sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-${{ env.CLANG_FORMAT_VERSION }} main" + sudo apt install clang-format-${{ env.CLANG_FORMAT_VERSION }} + sudo ln --symbolic --force /usr/bin/clang-format-${{ env.CLANG_FORMAT_VERSION }} /usr/bin/clang-format + - name: Verify clang-format installation + run: | + clang-format-${{ env.CLANG_FORMAT_VERSION }} --version + which clang-format-${{ env.CLANG_FORMAT_VERSION }} + clang-format --version + which clang-format + - name: Check if files are properly formatted + run: tools/clang-format_runner.sh --dry-run --Werror \ No newline at end of file diff --git a/.gitignore b/.gitignore index 936f8ebbb..864a8ab2c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ # Files specific to this repo # ############################## -cholla* googletest* +tidy_results*.txt +bin/* # Compiled source # ################### @@ -15,6 +16,7 @@ googletest* *.a a.out *.dSYM +__pycache__ # Makefiles # ############# @@ -24,7 +26,7 @@ makefile.summit *.o ##executable -cholla* +cholla.* ## input files #parameter_file.txt @@ -38,10 +40,9 @@ data out.* o.* run - - disk.* - +*.gcno +*.gcda # Logs and databases # ###################### @@ -66,7 +67,6 @@ disk.* # OS generated files # ###################### .DS_Store - .remote-sync.json .remote-sync_macos.json ._* @@ -84,4 +84,3 @@ Thumbs.db ############################# docs/doxygen/build docs/sphinx/build - diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 000000000..8928df96c --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,121 @@ +pipeline +{ + agent none + + environment + { + CHOLLA_ROOT = "${env.WORKSPACE}" + CHOLLA_MACHINE = 'crc' + CHOLLA_LAUNCH_COMMAND = 'mpirun -np' + } + + stages + { + stage('BuildAndTest') + { + matrix + { + agent + { + label + { + label 'eschneider-ppc-n4' + customWorkspace "${env.JOB_NAME}/${env.CHOLLA_MAKE_TYPE}" + } + } + + axes + { + axis + { + name 'CHOLLA_MAKE_TYPE' + values 'hydro', 'gravity', 'disk', 'particles', 'cosmology', 'mhd', 'dust', 'cooling' + } + } + + stages + { + stage('Clone Repo Cholla') + { + steps + { + sh ''' + git submodule update --init --recursive + make clobber + ''' + } + } + stage('Build Cholla') + { + steps + { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + buildCholla OPTIMIZE + ''' + } + } + stage('Build Tests') + { + steps + { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + buildChollaTests + ''' + } + } + stage('Run Tests') + { + steps + { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + runTests + ''' + } + } + stage('Run Clang Tidy') + { + steps + { + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + sh ''' + source builds/run_tests.sh + setupTests -c gcc -t ${CHOLLA_MAKE_TYPE} + + module load clang/17.0.1 + make tidy CLANG_TIDY_ARGS="--warnings-as-errors=*" TYPE=${CHOLLA_MAKE_TYPE} + ''' + } + } + } + stage('Show Tidy Results') + { + steps + { + // Print the clang-tidy results with bars of equal + // signs seperating each file + sh ''' + printf '=%.0s' {1..100} + printf "\n" + cat tidy_results_cpp_${CHOLLA_MAKE_TYPE}.log + printf '=%.0s' {1..100} + printf "\n" + cat tidy_results_gpu_${CHOLLA_MAKE_TYPE}.log + printf '=%.0s' {1..100} + printf "\n" + ''' + } + } + } + } + } + } +} diff --git a/Makefile b/Makefile index b4975b1ea..c444ae4a8 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +SHELL = /usr/bin/env bash #-- Set default include makefile MACHINE ?= $(shell builds/machine.sh) TYPE ?= hydro @@ -10,67 +11,78 @@ CUDA_ARCH ?= sm_70 DIRS := src src/analysis src/chemistry_gpu src/cooling src/cooling_grackle src/cosmology \ src/cpu src/global src/gravity src/gravity/paris src/grid src/hydro \ - src/integrators src/io src/main.cpp src/main_tests.cpp \ + src/integrators src/io src/main.cpp src/main_tests.cpp src/mhd\ src/model src/mpi src/old_cholla src/particles src/reconstruction \ - src/riemann_solvers src/system_tests src/utils + src/riemann_solvers src/system_tests src/utils src/dust SUFFIX ?= .$(TYPE).$(MACHINE) -CFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.c)) CPPFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cpp)) GPUFILES := $(foreach DIR,$(DIRS),$(wildcard $(DIR)/*.cu)) # Build a list of all potential object files so cleaning works properly -CLEAN_OBJS := $(subst .c,.o,$(CFILES)) \ - $(subst .cpp,.o,$(CPPFILES)) \ +CLEAN_OBJS := $(subst .cpp,.o,$(CPPFILES)) \ $(subst .cu,.o,$(GPUFILES)) -# Set testing related lists and variables +# Check if it should include testing flags ifeq ($(TEST), true) - # This is a test build so lets clear out Cholla's main file and set - # appropriate compiler flags, suffix, etc + ADD_TEST_FLAGS = yes $(info Building Tests...) $(info ) - SUFFIX := $(strip $(SUFFIX)).tests CPPFILES := $(filter-out src/main.cpp,$(CPPFILES)) - LIBS += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp - TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include - CFLAGS = $(TEST_FLAGS) - CXXFLAGS = $(TEST_FLAGS) - GPUFLAGS = $(TEST_FLAGS) - + # HACK # Set the build flags to debug. This is mostly to avoid the approximations # made by Ofast which break std::isnan and std::isinf which are required for # the testing BUILD = DEBUG +endif +ifeq ($(MAKECMDGOALS), tidy) + ADD_TEST_FLAGS = yes +endif + +# Set testing related lists and variables +ifeq ($(ADD_TEST_FLAGS), yes) + # This is a test build so lets clear out Cholla's main file and set + # appropriate compiler flags, suffix, etc + SUFFIX := $(strip $(SUFFIX)).tests + LIBS += -L$(GOOGLETEST_ROOT)/lib64 -pthread -lgtest -lhdf5_cpp + TEST_FLAGS = -I$(GOOGLETEST_ROOT)/include + CXXFLAGS += $(TEST_FLAGS) + GPUFLAGS += $(TEST_FLAGS) else # This isn't a test build so clear out testing related files - CFILES := $(filter-out src/system_tests/% %_tests.c,$(CFILES)) CPPFILES := $(filter-out src/system_tests/% %_tests.cpp,$(CPPFILES)) CPPFILES := $(filter-out src/utils/testing_utilities.cpp,$(CPPFILES)) GPUFILES := $(filter-out src/system_tests/% %_tests.cu,$(GPUFILES)) endif -OBJS := $(subst .c,.o,$(CFILES)) \ - $(subst .cpp,.o,$(CPPFILES)) \ +ifeq ($(COVERAGE), true) + CXXFLAGS += --coverage +endif + +OBJS := $(subst .cpp,.o,$(CPPFILES)) \ $(subst .cu,.o,$(GPUFILES)) #-- Set default compilers and flags -CC ?= cc CXX ?= CC -CFLAGS_OPTIMIZE ?= -g -Ofast -CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++14 -GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++14 +CXXFLAGS_OPTIMIZE ?= -g -Ofast -std=c++17 +GPUFLAGS_OPTIMIZE ?= -g -O3 -std=c++17 + +CXXFLAGS_DEBUG ?= -g -O0 -std=c++17 +ifdef HIPCONFIG + GPUFLAGS_DEBUG ?= -g -O0 -std=c++17 +else + GPUFLAGS_DEBUG ?= -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx +endif + BUILD ?= OPTIMIZE -CFLAGS += $(CFLAGS_$(BUILD)) CXXFLAGS += $(CXXFLAGS_$(BUILD)) GPUFLAGS += $(GPUFLAGS_$(BUILD)) #-- Add flags and libraries as needed -CFLAGS += $(DFLAGS) -Isrc CXXFLAGS += $(DFLAGS) -Isrc GPUFLAGS += $(DFLAGS) -Isrc @@ -89,6 +101,13 @@ ifeq ($(findstring -DPARIS,$(DFLAGS)),-DPARIS) endif endif +ifeq ($(findstring -DSUPERNOVA,$(DFLAGS)),-DSUPERNOVA) + ifdef HIPCONFIG + CXXFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include + GPUFLAGS += -I$(ROCM_PATH)/include/hiprand -I$(ROCM_PATH)/hiprand/include + endif +endif + ifeq ($(findstring -DHDF5,$(DFLAGS)),-DHDF5) CXXFLAGS += -I$(HDF5_ROOT)/include GPUFLAGS += -I$(HDF5_ROOT)/include @@ -117,6 +136,7 @@ ifdef HIPCONFIG DFLAGS += -DO_HIP CXXFLAGS += $(HIPCONFIG) GPUCXX ?= hipcc + #GPUFLAGS += -Wall LD := $(CXX) LDFLAGS := $(CXXFLAGS) -L$(ROCM_PATH)/lib LIBS += -lamdhip64 @@ -144,7 +164,7 @@ ifeq ($(findstring -DCHEMISTRY_GPU,$(DFLAGS)),-DCHEMISTRY_GPU) DFLAGS += -DSCALAR endif -.SUFFIXES: .c .cpp .cu .o +.SUFFIXES: .cpp .cu .o EXEC := bin/cholla$(SUFFIX) @@ -154,28 +174,56 @@ DFLAGS += -DGIT_HASH='"$(shell git rev-parse --verify HEAD)"' MACRO_FLAGS := -DMACRO_FLAGS='"$(DFLAGS)"' DFLAGS += $(MACRO_FLAGS) +# Setup variables for clang-tidy +LIBS_CLANG_TIDY := $(subst -I/, -isystem /,$(LIBS)) +# This tells clang-tidy that the path after each -isystem command is a system library so that it can be easily ignored by the header filter regex +LIBS_CLANG_TIDY += -isystem $(MPI_ROOT)/include -isystem $(HDF5_ROOT)/include +CXXFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(LDFLAGS)) +GPUFLAGS_CLANG_TIDY := $(subst -I/, -isystem /,$(GPUFLAGS)) +GPUFLAGS_CLANG_TIDY := $(filter-out -ccbin=mpicxx -fmad=false --expt-extended-lambda,$(GPUFLAGS_CLANG_TIDY)) +GPUFLAGS_CLANG_TIDY += --cuda-host-only --cuda-path=$(CUDA_ROOT) -isystem /clang/includes +CPPFILES_TIDY := $(CPPFILES) +GPUFILES_TIDY := $(GPUFILES) + +ifdef TIDY_FILES + CPPFILES_TIDY := $(filter $(TIDY_FILES), $(CPPFILES_TIDY)) + GPUFILES_TIDY := $(filter $(TIDY_FILES), $(GPUFILES_TIDY)) +endif + $(EXEC): prereq-build $(OBJS) mkdir -p bin/ && $(LD) $(LDFLAGS) $(OBJS) -o $(EXEC) $(LIBS) eval $(EXTRA_COMMANDS) -%.o: %.c - $(CC) $(CFLAGS) -c $< -o $@ - %.o: %.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ %.o: %.cu $(GPUCXX) $(GPUFLAGS) -c $< -o $@ -.PHONY: clean +.PHONY: clean, clobber, tidy, format + +format: + tools/clang-format_runner.sh + +tidy: +# Flags we might want +# - --warnings-as-errors= Upgrade all warnings to error, good for CI + clang-tidy --verify-config + @echo -e + (time clang-tidy $(CLANG_TIDY_ARGS) $(CPPFILES_TIDY) -- $(DFLAGS) $(CXXFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_cpp_$(TYPE).log 2>&1 & \ + (time clang-tidy $(CLANG_TIDY_ARGS) $(GPUFILES_TIDY) -- $(DFLAGS) $(GPUFLAGS_CLANG_TIDY) $(LIBS_CLANG_TIDY)) > tidy_results_gpu_$(TYPE).log 2>&1 & \ + for i in 1 2; do wait -n; done + @echo -e "\nResults from clang-tidy are available in the 'tidy_results_cpp_$(TYPE).log' and 'tidy_results_gpu_$(TYPE).log' files." clean: rm -f $(CLEAN_OBJS) rm -rf googletest -find bin/ -type f -executable -name "cholla.*.$(MACHINE)*" -exec rm -f '{}' \; + -find src/ -type f -name "*.gcno" -delete + -find src/ -type f -name "*.gcda" -delete clobber: clean - find . -type f -executable -name "cholla*" -exec rm -f '{}' \; + -find bin/ -type f -executable -name "cholla*" -exec rm -f '{}' \; -find bin/ -type d -name "t*" -prune -exec rm -rf '{}' \; rm -rf bin/cholla.*tests*.xml diff --git a/README.md b/README.md index c9d731b89..a61ce0ad6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -![Compile](https://github.com/cholla-hydro/cholla/actions/workflows/build_tests.yml/badge.svg) +![Build & Lint](https://github.com/cholla-hydro/cholla/actions/workflows/build_and_lint.yml/badge.svg) +![Code Formatting](https://github.com/cholla-hydro/cholla/actions/workflows/code_formatting.yml/badge.svg) CHOLLA ============ diff --git a/builds/make.host.c3po b/builds/make.host.c3po index 588a14861..dc5e3d8eb 100644 --- a/builds/make.host.c3po +++ b/builds/make.host.c3po @@ -1,12 +1,9 @@ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++14 -ccbin=mpicxx -GPUFLAGS_OPTIMIZE = -g -O3 -std=c++14 -ccbin=mpicxx +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 ${F_OFFLOAD} +GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 -ccbin=mpicxx -Xcompiler -rdynamic +GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 diff --git a/builds/make.host.crc b/builds/make.host.crc index 6378cc80e..e0c20e162 100644 --- a/builds/make.host.crc +++ b/builds/make.host.crc @@ -1,11 +1,9 @@ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 +GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 CUDA_ARCH = sm_70 OMP_NUM_THREADS = 16 diff --git a/builds/make.host.frontier b/builds/make.host.frontier index 14aae5d38..bae874c78 100644 --- a/builds/make.host.frontier +++ b/builds/make.host.frontier @@ -1,21 +1,18 @@ #-- make.host for Frontier at the OLCF with #-- Compiler and flags for different build type -CC = cc CXX = CC #GPUCXX ?= CC -x hip GPUCXX ?= hipcc -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 -Wno-unused-result -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14 -Wno-unused-result - -GPUFLAGS = --offload-arch=gfx90a -Wno-unused-result +GPUFLAGS_OPTIMIZE = -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result +GPUFLAGS_DEBUG = -g -O0 -std=c++17 --offload-arch=gfx90a -Wall -Wno-unused-result HIPCONFIG = -I$(ROCM_PATH)/include $(shell hipconfig -C) # workaround for Rocm 5.2 warnings #HIPCONFIG = $(shell hipconfig -C) -OMP_NUM_THREADS = 8 +OMP_NUM_THREADS = 7 #-- How to launch job JOB_LAUNCH = srun -u -A STF016 -n 1 -c 8 diff --git a/builds/make.host.github b/builds/make.host.github index acc003aad..46da09349 100644 --- a/builds/make.host.github +++ b/builds/make.host.github @@ -1,18 +1,16 @@ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -std=c++14 -GPUFLAGS_OPTIMIZE = -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} +GPUFLAGS_DEBUG = -g -G -cudart shared -O0 -std=c++17 +GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 OMP_NUM_THREADS = 7 #-- Library ifdef HIPCONFIG HIPCONFIG := -I$(shell hipconfig -R)/include $(shell hipconfig -C) + GPUFLAGS_DEBUG = -g -O0 -std=c++17 endif CUDA_ROOT := $(CUDA_ROOT) HDF5_ROOT := $(HDF5_ROOT) @@ -24,15 +22,8 @@ GOOGLETEST_ROOT := ${GOOGLETEST_ROOT} #-- MPI calls accept GPU buffers (requires GPU-aware MPI) # MPI_GPU = -DMPI_GPU -ifndef HIPCONFIG - GPUFLAGS_DEBUG += -g -O0 -ccbin=mpicxx - GPUFLAGS_OPTIMIZE += -g -O3 -ccbin=mpicxx -endif - ifdef HIPCONFIG MPI_ROOT := ${MPI_ROOT} - CFLAGS_DEBUG += -fPIE - CFLAGS_OPTIMIZE += -fPIE CXXFLAGS_DEBUG += -fPIE CXXFLAGS_OPTIMIZE += -fPIE GPUFLAGS_DEBUG += -fPIE diff --git a/builds/make.host.lux b/builds/make.host.lux index b8af559c1..edf4e42c0 100644 --- a/builds/make.host.lux +++ b/builds/make.host.lux @@ -1,14 +1,10 @@ #-- make.inc for the Shamrock Server #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 -GPUFLAGS = -std=c++14 - +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 +GPUFLAGS = -std=c++17 OMP_NUM_THREADS = 10 diff --git a/builds/make.host.poplar b/builds/make.host.poplar index 726ec788c..f029e09e6 100644 --- a/builds/make.host.poplar +++ b/builds/make.host.poplar @@ -1,12 +1,10 @@ #-- make.inc for Poplar, COE cluster at HPE #-- Compiler and flags for different build type -CC = cc CXX = CC -CFLAGS_DEBUG = -g -O0 ${F_OFFLOAD} -CFLAGS_OPTIMIZE = -Ofast ${F_OFFLOAD} -CXXFLAGS_DEBUG = -g -O0 -std=c++14 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD} +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} + GPUFLAGS = --offload-arch=gfx906,gfx908 HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.host.poplar.aomp b/builds/make.host.poplar.aomp index 984e432c2..e87fe68e2 100644 --- a/builds/make.host.poplar.aomp +++ b/builds/make.host.poplar.aomp @@ -1,13 +1,11 @@ #-- make.inc for Poplar, COE cluster at HPE #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx HIPCONFIG = $(shell hipconfig -C) -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 + GPUFLAGS = --offload-arch=gfx906,gfx908 LIBS = -lm -lstdc++ diff --git a/builds/make.host.poplar.cce+hip b/builds/make.host.poplar.cce+hip index 49b2e6256..b83268e12 100644 --- a/builds/make.host.poplar.cce+hip +++ b/builds/make.host.poplar.cce+hip @@ -1,13 +1,11 @@ #-- make.inc for Poplar, HPE COE cluster #-- Compiler and flags for different build type -CC = cc CXX = CC HIPCONFIG = $(shell hipconfig -C) -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -Ofast -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 + GPUFLAGS = --offload-arch=gfx906,gfx908 #-- How to launch job diff --git a/builds/make.host.shamrock b/builds/make.host.shamrock index fb6c63b94..eec8d48e6 100644 --- a/builds/make.host.shamrock +++ b/builds/make.host.shamrock @@ -1,14 +1,11 @@ #-- make.inc for the Lux Cluster #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx #CC = gcc #CXX = g++ -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 OMP_NUM_THREADS = 10 diff --git a/builds/make.host.spock b/builds/make.host.spock index a4b031788..9dfc41676 100644 --- a/builds/make.host.spock +++ b/builds/make.host.spock @@ -1,14 +1,10 @@ #-- make.inc for Spock EAS at the OLCF with #-- Compiler and flags for different build type -CC = cc CXX = CC -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 - -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -g -Ofast -std=c++17 GPUFLAGS = --offload-arch=gfx908 HIPCONFIG = $(shell hipconfig -C) diff --git a/builds/make.host.summit b/builds/make.host.summit index 2d557be26..a9f5337f5 100644 --- a/builds/make.host.summit +++ b/builds/make.host.summit @@ -2,14 +2,11 @@ # https://www.olcf.ornl.gov/summit/ #-- Compiler and flags for different build type -CC = mpicc CXX = mpicxx -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 ${F_OFFLOAD} -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 ${F_OFFLOAD} -GPUFLAGS_DEBUG = -g -O0 -std=c++14 -ccbin=mpicxx -GPUFLAGS_OPTIMIZE = -g -O3 -std=c++14 -ccbin=mpicxx +CXXFLAGS_DEBUG = -g -O0 -std=c++17 ${F_OFFLOAD} +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 ${F_OFFLOAD} +GPUFLAGS_DEBUG = -g -O0 -std=c++17 -ccbin=mpicxx -G -cudart shared +GPUFLAGS_OPTIMIZE = -g -O3 -std=c++17 -ccbin=mpicxx OMP_NUM_THREADS = 7 diff --git a/builds/make.host.tornado b/builds/make.host.tornado index df938d8e7..cdcf5483f 100644 --- a/builds/make.host.tornado +++ b/builds/make.host.tornado @@ -1,12 +1,9 @@ #-- make.inc for the Lux Cluster #-- Compiler and flags for different build type -CC = gcc CXX = g++ -CFLAGS_DEBUG = -g -O0 -CFLAGS_OPTIMIZE = -g -O2 -CXXFLAGS_DEBUG = -g -O0 -std=c++14 -CXXFLAGS_OPTIMIZE = -Ofast -std=c++14 +CXXFLAGS_DEBUG = -g -O0 -std=c++17 +CXXFLAGS_OPTIMIZE = -Ofast -std=c++17 OMP_NUM_THREADS = 10 diff --git a/builds/make.inc.template b/builds/make.inc.template index 22fbd663d..abfa97d1e 100644 --- a/builds/make.inc.template +++ b/builds/make.inc.template @@ -1,13 +1,8 @@ #POISSON_SOLVER ?= -DPFFT #DFLAGS += $(POISSON_SOLVER) - -#To use GPUs, CUDA must be turned on here -#Optional error checking can also be enabled -DFLAGS += -DCUDA #-DCUDA_ERROR_CHECK - #To use MPI, DFLAGS must include -DMPI_CHOLLA -DFLAGS += -DMPI_CHOLLA -DBLOCK +DFLAGS += -DMPI_CHOLLA #Set the MPI Processes grid [nproc_x, nproc_y, nproc_z] #DFLAGS += -DSET_MPI_GRID @@ -33,8 +28,8 @@ DFLAGS += -DHDF5 # Reconstruction #DFLAGS += -DPCM #DFLAGS += -DPLMP -#DFLAGS += -DPLMC -DFLAGS += -DPPMP +DFLAGS += -DPLMC +#DFLAGS += -DPPMP #DFLAGS += -DPPMC # Riemann Solver @@ -65,7 +60,7 @@ DFLAGS += -DTEMPERATURE_FLOOR #DFLAGS += -DDYNAMIC_GPU_ALLOC # Set the cooling function -#DFLAGS += -DCOOLING_GPU +#DFLAGS += -DCOOLING_GPU #DFLAGS += -DCLOUDY_COOL # Use Tiled Iitial Conditions for Scaling Tets diff --git a/builds/make.type.basic_scalar b/builds/make.type.basic_scalar new file mode 100644 index 000000000..02706b223 --- /dev/null +++ b/builds/make.type.basic_scalar @@ -0,0 +1,33 @@ +#-- Default hydro build with BASIC_SCALAR + +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPLMC +DFLAGS += -DHLLC + +# Integrator +# DFLAGS += -DSIMPLE +DFLAGS += -DVL + +# Apply a density and temperature floor +DFLAGS += -DDENSITY_FLOOR +DFLAGS += -DTEMPERATURE_FLOOR + +# Toggle scalar fields in general +DFLAGS += -DSCALAR +# Toggle Basic scalar field +DFLAGS += -DBASIC_SCALAR + +# Solve the Gas Internal Energy usisng a Dual Energy Formalism +#DFLAGS += -DDE + +# Apply cooling on the GPU from precomputed tables +#DFLAGS += -DCOOLING_GPU + +# Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +# Select output format +# Can also add -DSLICES and -DPROJECTIONS +OUTPUT ?= -DOUTPUT -DHDF5 +DFLAGS += $(OUTPUT) diff --git a/builds/make.type.cloudy b/builds/make.type.cloudy index e604ff818..10fa51d60 100644 --- a/builds/make.type.cloudy +++ b/builds/make.type.cloudy @@ -6,9 +6,7 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC diff --git a/builds/make.type.cooling b/builds/make.type.cooling index baf4ed0e9..0b96722a0 100644 --- a/builds/make.type.cooling +++ b/builds/make.type.cooling @@ -6,9 +6,7 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK DFLAGS += -DPRECISION=2 DFLAGS += -DPPMP DFLAGS += -DHLLC diff --git a/builds/make.type.disk b/builds/make.type.disk index a142a4756..47bb22829 100644 --- a/builds/make.type.disk +++ b/builds/make.type.disk @@ -1,37 +1,47 @@ -MPI_GPU = +MPI_GPU = -DMPI_GPU DFLAGS += -DPARTICLES -DFLAGS += -DPARTICLES_CPU -DFLAGS += -DONLY_PARTICLES +#DFLAGS += -DPARTICLES_CPU +DFLAGS += -DPARTICLES_GPU +#DFLAGS += -DONLY_PARTICLES DFLAGS += -DPARTICLE_IDS -DFLAGS += -DSINGLE_PARTICLE_MASS +#DFLAGS += -DSINGLE_PARTICLE_MASS +DFLAGS += -DPARTICLE_AGE +DFLAGS += -DSUPERNOVA #this flag requires PARTICLE_AGE, PARTICLE_IDS +DFLAGS += -DANALYSIS +#DFLAGS += -DPARTICLES_KDK DFLAGS += -DGRAVITY +DFLAGS += -DGRAVITY_GPU # Use both -DSOR and -DPARIS_GALACTIC to run analytic test and compare solutions -DFLAGS += -DSOR +#DFLAGS += -DSOR DFLAGS += -DPARIS_GALACTIC DFLAGS += -DGRAVITY_ANALYTIC_COMP +DFLAGS += -DGRAVITY_5_POINTS_GRADIENT +#DFLAGS += -DSTATIC_GRAV -DFLAGS += -DCUDA -DFLAGS += -DMPI_CHOLLA -DFLAGS += -DBLOCK +DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +DFLAGS += -DPLMC DFLAGS += -DHLLC DFLAGS += -DVL -#DFLAGS += -DDISK_ICS +DFLAGS += -DDISK_ICS DFLAGS += -DDENSITY_FLOOR DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DCOOLING_GPU +#DFLAGS += -DCLOUDY_COOL DFLAGS += -DDE DFLAGS += -DCPU_TIME +DFLAGS += -DAVERAGE_SLOW_CELLS +DFLAGS += -DHYDRO_GPU -OUTPUT ?= -DOUTPUT -DHDF5 +OUTPUT ?= -DOUTPUT -DHDF5 -DSLICES -DPROJECTION DFLAGS += $(OUTPUT) -DFLAGS += $(MPI_GPU) +DFLAGS += $(MPI_GPU) DFLAGS += -DPARALLEL_OMP DFLAGS += -DN_OMP_THREADS=$(OMP_NUM_THREADS) diff --git a/builds/make.type.dust b/builds/make.type.dust new file mode 100644 index 000000000..1669a4077 --- /dev/null +++ b/builds/make.type.dust @@ -0,0 +1,46 @@ +#-- Default hydro + dust + +#-- separated output flag so that it can be overriden in target-specific +# for make check +OUTPUT ?= -DOUTPUT -DHDF5 + +MPI_GPU ?= + +DFLAGS += -DMPI_CHOLLA +DFLAGS += -DPRECISION=2 +DFLAGS += -DPLMC +DFLAGS += -DHLLC + +DFLAGS += -DDE +DFLAGS += -DAVERAGE_SLOW_CELLS +DFLAGS += -DTEMPERATURE_FLOOR +DFLAGS += -DDENSITY_FLOOR + +DFLAGS += -DVL + +# Evolve additional scalars +DFLAGS += -DSCALAR +DFLAGS += -DSCALAR_FLOOR + +# Define dust macro +DFLAGS += -DDUST + +# Apply the cooling in the GPU from precomputed tables +DFLAGS += -DCOOLING_GPU +DFLAGS += -DCLOUDY_COOLING + +#Measure the Timing of the different stages +#DFLAGS += -DCPU_TIME + +DFLAGS += -DSLICES +DFLAGS += -DPROJECTION + +DFLAGS += $(OUTPUT) + +DFLAGS += -DOUTPUT_ALWAYS + +#Select if the Hydro Conserved data will reside in the GPU +#and the MPI transfers are done from the GPU +#If not specified, MPI_GPU is off by default +#This is set in the system make.host file +DFLAGS += $(MPI_GPU) \ No newline at end of file diff --git a/builds/make.type.hydro b/builds/make.type.hydro index 5824e6deb..9e9b1d77c 100644 --- a/builds/make.type.hydro +++ b/builds/make.type.hydro @@ -1,14 +1,16 @@ #-- Default hydro only build -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator +ifeq ($(findstring cosmology,$(TYPE)),cosmology) DFLAGS += -DSIMPLE -#DFLAGS += -DVL +else +DFLAGS += -DVL +endif # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR @@ -27,4 +29,3 @@ DFLAGS += -DTEMPERATURE_FLOOR # Can also add -DSLICES and -DPROJECTIONS OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) - diff --git a/builds/make.type.mhd b/builds/make.type.mhd index 3f67ea88f..6348c173e 100644 --- a/builds/make.type.mhd +++ b/builds/make.type.mhd @@ -6,18 +6,14 @@ OUTPUT ?= -DOUTPUT -DHDF5 MPI_GPU ?= -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMP +DFLAGS += -DPLMC DFLAGS += -DHLLD DFLAGS += -DMHD -ifeq ($(findstring cosmology,$(TYPE)),cosmology) -DFLAGS += -DSIMPLE -else +# MHD only supports the Van Leer integrator DFLAGS += -DVL -endif # need this if using Disk_3D # DFLAGS += -DDISK_ICS @@ -35,7 +31,7 @@ DFLAGS += -DTEMPERATURE_FLOOR # Apply the cooling in the GPU from precomputed tables # DFLAGS += -DCOOLING_GPU -#Measure the Timing of the different stages +# Measure the Timing of the different stages DFLAGS += -DCPU_TIME DFLAGS += $(OUTPUT) @@ -45,3 +41,12 @@ DFLAGS += $(OUTPUT) #If not specified, MPI_GPU is off by default #This is set in the system make.host file DFLAGS += $(MPI_GPU) + +# Disable CUDA error checking +# DFLAGS += -DDISABLE_GPU_ERROR_CHECKING + +# NOTE: The following macros are to help facilitate debugging and should not be +# used on scientific runs + +# Limit the number of steps to evolve. +# DFLAGS += -DN_STEPS_LIMIT=1000 diff --git a/builds/make.type.rot_proj b/builds/make.type.rot_proj index e6faa7514..76eea26d8 100644 --- a/builds/make.type.rot_proj +++ b/builds/make.type.rot_proj @@ -1,14 +1,13 @@ #-- Default hydro only build with rotated projection -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR @@ -28,4 +27,4 @@ DFLAGS += -DTEMPERATURE_FLOOR OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) -DFLAGS += -DROTATED_PROJECTION \ No newline at end of file +DFLAGS += -DROTATED_PROJECTION diff --git a/builds/make.type.static_grav b/builds/make.type.static_grav index ffa15c4ee..2c17f7e8b 100644 --- a/builds/make.type.static_grav +++ b/builds/make.type.static_grav @@ -1,14 +1,13 @@ #-- Default hydro only build with static_grav -DFLAGS += -DCUDA DFLAGS += -DMPI_CHOLLA DFLAGS += -DPRECISION=2 -DFLAGS += -DPPMC +DFLAGS += -DPLMC DFLAGS += -DHLLC # Integrator -DFLAGS += -DSIMPLE -#DFLAGS += -DVL +# DFLAGS += -DSIMPLE +DFLAGS += -DVL # Apply a density and temperature floor DFLAGS += -DDENSITY_FLOOR @@ -29,4 +28,3 @@ DFLAGS += -DSTATIC_GRAV # Can also add -DSLICES and -DPROJECTIONS OUTPUT ?= -DOUTPUT -DHDF5 DFLAGS += $(OUTPUT) - diff --git a/builds/run_tests.sh b/builds/run_tests.sh index bca41e411..0fc1ed629 100755 --- a/builds/run_tests.sh +++ b/builds/run_tests.sh @@ -54,6 +54,8 @@ setupTests () return 1 fi + builtin cd $CHOLLA_ROOT + # Determine the hostname then use that to pick the right machine name and launch # command if [[ -n ${CHOLLA_MACHINE+x} ]]; then @@ -94,10 +96,6 @@ setupTests () ;; esac - # Clean the cholla directory - builtin cd $CHOLLA_ROOT - make clobber - # Source the setup file source "${CHOLLA_ROOT}/builds/setup.${CHOLLA_MACHINE}${CHOLLA_COMPILER}.sh" } @@ -110,7 +108,7 @@ buildCholla () { echo -e "\nBuilding Cholla...\n" builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} BUILD=${1} COVERAGE=${2} } # ============================================================================== @@ -121,7 +119,7 @@ buildChollaTests () { echo builtin cd $CHOLLA_ROOT - make -j TYPE=${CHOLLA_MAKE_TYPE} TEST=true + make --jobs=$(nproc) TYPE=${CHOLLA_MAKE_TYPE} TEST=true COVERAGE=${1} } # ============================================================================== @@ -205,6 +203,51 @@ runTests () } # ============================================================================== +# ============================================================================== +# This function generates a coverage report after the tests have been run. +# The final report is a website in bin/html_coverage_report_${CHOLLA_MAKE_TYPE} +chollaCoverage () +{ + # Setup the names of files that we will use + local base_file="bin/coverage_base_${CHOLLA_MAKE_TYPE}.info" + local test_file="bin/coverage_test_${CHOLLA_MAKE_TYPE}.info" + local combined_file="bin/coverage_combined_${CHOLLA_MAKE_TYPE}.info" + + # Generate the initial report with no coverage info. This is needed so that + # lcov knows about all the files, not just the ones that are tested + lcov --capture --initial --directory ${CHOLLA_ROOT}/src --output-file ${base_file} + + # Now we get the actual coverage information + lcov --capture --directory ${CHOLLA_ROOT}/src --output-file ${test_file} + + # Then combine the the two coverage files so we know what changed, i.e. which + # lines were actually covered + lcov --add-tracefile ${base_file} --add-tracefile ${test_file} --output-file ${combined_file} + + # Extract data from only the files within CHOLLA_ROOT. This should exclude any + # system or external libraries + lcov --extract ${combined_file} "${CHOLLA_ROOT}/*" --output-file ${combined_file} + + # exclude_patterns=('*-tests.cpp') # Remove traces of the tests themselves + # # --remove TRACEFILE PATTERN = remove all things associated with PATTERN in TRACEFILE + # lcov --remove ${combined_file} "${exclude_patterns[@]}" --output-file ${combined_file} + + # List the contents + lcov --list ${combined_file} + + # Generate HTML report + genhtml ${combined_file} --output-directory bin/html_coverage_report_${CHOLLA_MAKE_TYPE} + + # Combine all tracefiles together. Define the different make types then add + # the appropriate prefixes and suffices. + # build_types=(cosmology disk dust gravity hydro mhd particles) + # build_types=("${build_types[@]/#/--add-trace bin/coverage_combined_}") + # build_types=("${build_types[@]/%/.info}") + # eval "build_types=(${build_types[@]})" + # lcov "${build_types[@]}" --output-file bin/full_coverage_report.info +} +# ============================================================================== + # ============================================================================== # Call all the functions required for setting up, building, and running tests # @@ -214,15 +257,20 @@ runTests () # argument is the value of COMPILER which does not occur for all setup scripts # \param[in] -g (optional) If set then download and build a local version of # GoogleTest to use instead of the machine default +# \param[in] -d (optional) Build Cholla in debug mode +# \param[in] -l (optional) Generate coverage reports when building and running Cholla buildAndRunTests () { # Unset BUILD_GTEST so that subsequent runs aren't tied to what previous runs # did unset BUILD_GTEST + BUILD_MODE='OPTIMIZE' + CODE_COVERAGE='false' + # Check arguments local OPTIND - while getopts "t:c:g" opt; do + while getopts "t:c:g:d:l" opt; do case $opt in t) # Set the make type MAKE_TYPE_ARG="-t ${OPTARG}" @@ -233,6 +281,12 @@ buildAndRunTests () g) # Build GoogleTest locally? BUILD_GTEST=true ;; + d) # Build the debug version of Cholla? + BUILD_MODE='DEBUG' + ;; + l) # Generate Code Coverage? + CODE_COVERAGE='true' + ;; \?) echo "Invalid option: -${OPTARG}" >&2 return 1 @@ -244,13 +298,27 @@ buildAndRunTests () esac done + # Run setup and check if it worked + setupTests $MAKE_TYPE_ARG $COMPILER_ARG + if [ $? -ne 0 ]; then + echo "setup failed" + exit 1 + fi + + # Clean the cholla directory + builtin cd $CHOLLA_ROOT + make clobber + # Now we get to setting up and building - setupTests $MAKE_TYPE_ARG $COMPILER_ARG && \ if [[ -n $BUILD_GTEST ]]; then buildGoogleTest fi - buildCholla && \ - buildChollaTests && \ + buildCholla $BUILD_MODE $CODE_COVERAGE && \ + buildChollaTests $CODE_COVERAGE && \ runTests + + if [ $CODE_COVERAGE = "true" ]; then + chollaCoverage + fi } # ============================================================================== diff --git a/builds/setup.c3po.gcc.sh b/builds/setup.c3po.gcc.sh index 24fc6860d..d08360e6b 100755 --- a/builds/setup.c3po.gcc.sh +++ b/builds/setup.c3po.gcc.sh @@ -7,5 +7,5 @@ echo "mpicxx --version is: " mpicxx --version # export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp -foffload=disable" +export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 diff --git a/builds/setup.crc.gcc.sh b/builds/setup.crc.gcc.sh index 586dcbd00..7893b2875 100755 --- a/builds/setup.crc.gcc.sh +++ b/builds/setup.crc.gcc.sh @@ -9,5 +9,5 @@ echo "mpicxx --version is: " mpicxx --version # export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp -foffload=disable" +export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 diff --git a/builds/setup.frontier.cce.sh b/builds/setup.frontier.cce.sh index 4a22344d2..afb251680 100755 --- a/builds/setup.frontier.cce.sh +++ b/builds/setup.frontier.cce.sh @@ -15,3 +15,4 @@ export MPICH_GPU_SUPPORT_ENABLED=1 export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH} export CHOLLA_ENVSET=1 +export ROCFFT_RTC_CACHE_PATH=/dev/null diff --git a/builds/setup.github.gcc.sh b/builds/setup.github.gcc.sh index fd001f23a..a959b3cea 100755 --- a/builds/setup.github.gcc.sh +++ b/builds/setup.github.gcc.sh @@ -4,5 +4,4 @@ # source ./setup.c3po.gcc.sh # export MPI_GPU="-DMPI_GPU" -export F_OFFLOAD="-fopenmp -foffload=disable" export CHOLLA_ENVSET=1 diff --git a/builds/setup.poplar.aomp.sh b/builds/setup.poplar.aomp.sh index 7b83e5ab7..d692711fe 100755 --- a/builds/setup.poplar.aomp.sh +++ b/builds/setup.poplar.aomp.sh @@ -1,12 +1,11 @@ #!/bin/bash module purge -module load craype-x86-naples craype-network-infiniband +module load craype-x86-naples craype-network-infiniband module load shared slurm module use /home/users/twhite/share/modulefiles module load ompi/4.0.4-rocm-3.9 hdf5 -export OMPI_CC=$(which clang) export OMPI_CXX=$(which clang) export CHOLLA_MACHINE=poplar.aomp diff --git a/builds/setup.summit.gcc.sh b/builds/setup.summit.gcc.sh index 81a99dd36..0f15f6bfe 100755 --- a/builds/setup.summit.gcc.sh +++ b/builds/setup.summit.gcc.sh @@ -6,6 +6,5 @@ #module load gcc/10.2.0 cuda/11.4.0 fftw hdf5 python module load gcc cuda fftw hdf5 python googletest/1.11.0 -#export F_OFFLOAD="-fopenmp -foffload=nvptx-none='-lm -Ofast'" -export F_OFFLOAD="-fopenmp -foffload=disable" +export F_OFFLOAD="-fopenmp" export CHOLLA_ENVSET=1 diff --git a/cholla-tests-data b/cholla-tests-data index 66d592821..da5c3a309 160000 --- a/cholla-tests-data +++ b/cholla-tests-data @@ -1 +1 @@ -Subproject commit 66d5928213b495c2fef61b0653b90a25ae3aa7cf +Subproject commit da5c3a309d5451fabdec27fd7942e6121bb9c277 diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile new file mode 100644 index 000000000..abecbe2c3 --- /dev/null +++ b/docker/cuda/Dockerfile @@ -0,0 +1,29 @@ +FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 +# Needs to be devel, not base or runtime, to have nvcc +# Ubuntu 22 is better than 18 because Ubuntu 22 default git is > 2.17 +# Github actions requires git > 2.17 so that cholla is pulled into a git repo +# Which is required for the Makefile +# With ubuntu 22.04 this grabs 2.34.1 + +RUN apt-get -y update && apt install -y \ + cmake \ + git \ + gnupg \ + libgtest-dev \ + libhdf5-serial-dev \ + libopenmpi-dev \ + openmpi-bin \ + software-properties-common \ + wget + +# Install Clang and Tools +RUN wget https://apt.llvm.org/llvm.sh && \ + chmod +x llvm.sh && \ + echo "\n" | ./llvm.sh 15 all && \ + find /usr/bin/ -name 'clang*15' | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c + +# Needed by Cholla Makefile +ENV CHOLLA_MACHINE=github +ENV CUDA_ROOT=/usr/local/cuda-11/ +ENV HDF5_ROOT=/usr/lib/x86_64-linux-gnu/hdf5/serial/ +ENV MPI_ROOT=/usr/lib/x86_64-linux-gnu/openmpi/ diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile new file mode 100644 index 000000000..3a7eb66ed --- /dev/null +++ b/docker/rocm/Dockerfile @@ -0,0 +1,36 @@ +FROM rocm/dev-ubuntu-20.04:5.2.3 + +# Avoid annoying cmake -> tzdata install prompt +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get -y update && apt-get -y install \ + cmake \ + git \ + gnupg \ + hipfft \ + libgtest-dev \ + libhdf5-serial-dev \ + libopenmpi-dev \ + openmpi-bin \ + rocfft \ + software-properties-common \ + wget + +# Needed to trick ROCm into thinking there's a GPU +RUN echo "gfx90a" | sudo tee --append $(hipconfig -R)/bin/target.lst + +# Install rocRand +RUN apt-get -y install rocrand + +# Install Clang and Tools +# RUN wget https://apt.llvm.org/llvm.sh && \ +# chmod +x llvm.sh && \ +# echo "\n" | ./llvm.sh 15 all && \ +# find /usr/bin/ -name 'clang*15' | sed -E 's/^(\/usr\/bin\/.*)(\-[0-9]*)$/ln -s -v \1\2 \1/' | xargs -d '\n' -n 1 bash -c + +# Needed by Cholla Makefile +ENV CHOLLA_MACHINE=github +ENV HIPCONFIG=/opt/rocm-5.2.3 +ENV ROCM_PATH=/opt/rocm-5.2.3 +ENV HDF5_ROOT=/usr/lib/x86_64-linux-gnu/hdf5/serial +ENV MPI_ROOT=/usr/lib/x86_64-linux-gnu/openmpi diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 9c3acb19f..4fedbe262 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -908,7 +908,7 @@ FILE_PATTERNS = *.c \ # be searched for input files as well. # The default value is: NO. -RECURSIVE = NO +RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a @@ -964,7 +964,7 @@ EXAMPLE_PATTERNS = * # irrespective of the value of the RECURSIVE tag. # The default value is: NO. -EXAMPLE_RECURSIVE = NO +EXAMPLE_RECURSIVE = YES # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the diff --git a/examples/1D/123.txt b/examples/1D/123.txt index 79a3b23a3..3f693baa6 100644 --- a/examples/1D/123.txt +++ b/examples/1D/123.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/Creasey_shock.txt b/examples/1D/Creasey_shock.txt index f7d98d7dc..59821a945 100644 --- a/examples/1D/Creasey_shock.txt +++ b/examples/1D/Creasey_shock.txt @@ -26,6 +26,10 @@ zlen=3.08567758e18 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/Shu_Osher.txt b/examples/1D/Shu_Osher.txt index 5d78eba7d..42d8a7ccb 100644 --- a/examples/1D/Shu_Osher.txt +++ b/examples/1D/Shu_Osher.txt @@ -19,15 +19,19 @@ gamma=1.4 # name of initial conditions init=Shu_Osher # domain properties -xmin=0.0 +xmin=-1.0 ymin=0.0 zmin=0.0 -xlen=1.0 +xlen=2.0 ylen=1.0 zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/noh_1D.txt b/examples/1D/noh_1D.txt index 3e9552295..d350c2479 100644 --- a/examples/1D/noh_1D.txt +++ b/examples/1D/noh_1D.txt @@ -14,7 +14,7 @@ tout=1.0 # time interval for output outstep=1.0 # name of initial conditions -init=Riemann_1D +init=Riemann # domain properties xmin=0.0 ymin=0.0 @@ -25,6 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/sound_wave.txt b/examples/1D/sound_wave.txt index c6555c662..13c6f8d05 100644 --- a/examples/1D/sound_wave.txt +++ b/examples/1D/sound_wave.txt @@ -25,23 +25,26 @@ zlen=1.0 # type of boundary conditions xl_bcnd=1 xu_bcnd=1 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/1D/square_wave.txt b/examples/1D/square_wave.txt index d33805c15..d22282a66 100644 --- a/examples/1D/square_wave.txt +++ b/examples/1D/square_wave.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=1 xu_bcnd=1 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/stationary.txt b/examples/1D/stationary.txt index 28941e868..746592847 100644 --- a/examples/1D/stationary.txt +++ b/examples/1D/stationary.txt @@ -27,6 +27,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/strong_shock.txt b/examples/1D/strong_shock.txt index 1726cf316..ff99eab23 100644 --- a/examples/1D/strong_shock.txt +++ b/examples/1D/strong_shock.txt @@ -25,6 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/test_3.txt b/examples/1D/test_3.txt index 60997270c..3eff8abcc 100644 --- a/examples/1D/test_3.txt +++ b/examples/1D/test_3.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/trac_pen.txt b/examples/1D/trac_pen.txt index 3c0081e5a..a24bf7167 100644 --- a/examples/1D/trac_pen.txt +++ b/examples/1D/trac_pen.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=1 xu_bcnd=1 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/1D/two_shocks.txt b/examples/1D/two_shocks.txt index a998bae46..c1ac4616a 100644 --- a/examples/1D/two_shocks.txt +++ b/examples/1D/two_shocks.txt @@ -26,6 +26,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 +yl_bcnd=0 +yu_bcnd=0 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/2D/Gresho.txt b/examples/2D/Gresho.txt index cc645431d..6595c5695 100644 --- a/examples/2D/Gresho.txt +++ b/examples/2D/Gresho.txt @@ -17,6 +17,8 @@ outstep=0.05 gamma=1.4 # name of initial conditions init=Gresho +# static gravity flag +custom_grav=1 # domain properties xmin=-0.5 ymin=-0.5 diff --git a/examples/2D/Noh_2D.txt b/examples/2D/Noh_2D.txt index 0e43af07d..5223983d5 100644 --- a/examples/2D/Noh_2D.txt +++ b/examples/2D/Noh_2D.txt @@ -29,6 +29,8 @@ xl_bcnd=2 xu_bcnd=4 yl_bcnd=2 yu_bcnd=4 +zl_bcnd=0 +zu_bcnd=0 custom_bcnd=noh # path to output directory outdir=./ diff --git a/examples/2D/Rayleigh_Taylor.txt b/examples/2D/Rayleigh_Taylor.txt index 3cf87dbea..919e654e1 100644 --- a/examples/2D/Rayleigh_Taylor.txt +++ b/examples/2D/Rayleigh_Taylor.txt @@ -17,6 +17,8 @@ outstep=0.05 gamma=1.4 # name of initial conditions init=Rayleigh_Taylor +#static gravity flag +custom_grav=2 # domain properties xmin=0.0 ymin=0.0 diff --git a/examples/2D/disk.txt b/examples/2D/disk.txt index 3dd0ce821..86397f6d9 100644 --- a/examples/2D/disk.txt +++ b/examples/2D/disk.txt @@ -17,6 +17,8 @@ outstep=2185.9 gamma=1.001 # name of initial conditions init=Disk_2D +# static gravity flag +custom_grav=3 # domain properties xmin=-20 ymin=-20 diff --git a/examples/2D/sod.txt b/examples/2D/sod.txt index 27df1f256..1f60eab77 100644 --- a/examples/2D/sod.txt +++ b/examples/2D/sod.txt @@ -27,6 +27,8 @@ xl_bcnd=3 xu_bcnd=3 yl_bcnd=3 yu_bcnd=3 +zl_bcnd=0 +zu_bcnd=0 # path to output directory outdir=./ diff --git a/examples/2D/sound_wave.txt b/examples/2D/sound_wave.txt index d69b1270f..109eb8050 100644 --- a/examples/2D/sound_wave.txt +++ b/examples/2D/sound_wave.txt @@ -27,24 +27,24 @@ xl_bcnd=1 xu_bcnd=1 yl_bcnd=1 yu_bcnd=1 +zl_bcnd=0 +zu_bcnd=0 # path to output directory -#outdir=outputs/ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/3D/Brio_and_Wu.txt b/examples/3D/Brio_and_Wu.txt index a742ae207..c1a9fe387 100644 --- a/examples/3D/Brio_and_Wu.txt +++ b/examples/3D/Brio_and_Wu.txt @@ -6,11 +6,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=256 # number of grid cells in the y dimension -ny=32 +ny=256 # number of grid cells in the z dimension -nz=32 +nz=256 # final output time tout=0.1 # time interval for output @@ -68,5 +68,5 @@ Bz_r=0.0 # location of initial discontinuity diaph=0.5 # value of gamma -gamma=2 +gamma=2.0 diff --git a/examples/3D/Dai_and_Woodward.txt b/examples/3D/Dai_and_Woodward.txt index 64c5351e6..a266cbb66 100644 --- a/examples/3D/Dai_and_Woodward.txt +++ b/examples/3D/Dai_and_Woodward.txt @@ -7,11 +7,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=256 # number of grid cells in the y dimension -ny=32 +ny=256 # number of grid cells in the z dimension -nz=32 +nz=256 # final output time tout=0.2 # time interval for output @@ -43,28 +43,28 @@ outdir=./ # density of left state rho_l=1.08 # velocity of left state -vx_l=0.0 -vy_l=0.0 -vz_l=0.0 +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 # pressure of left state -P_l=1.0 +P_l=0.95 # Magnetic field of the left state -Bx_l=14.17963081 -By_l=12.76166773 -Bz_l=7.0898154 +Bx_l=0.5641895835477563 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 # density of right state rho_r=1.0 # velocity of right state vx_r=0.0 vy_r=0.0 -vz_r=1.0 +vz_r=0.0 # pressure of right state -P_r=0.2 +P_r=1.0 # Magnetic field of the right state -Bx_r=14.17963081 -By_r=14.17963081 -Bz_r=7.0898154 +Bx_r=0.5641895835477563 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 # location of initial discontinuity diaph=0.5 diff --git a/examples/3D/KH_res_ind_3D.txt b/examples/3D/KH_res_ind_3D.txt index ab846867a..2ebe6cda0 100644 --- a/examples/3D/KH_res_ind_3D.txt +++ b/examples/3D/KH_res_ind_3D.txt @@ -10,7 +10,7 @@ ny=128 # number of grid cells in the z dimension nz=128 # final output time -tout=5.0 +tout=3.0 # time interval for output outstep=0.01 # value of gamma diff --git a/examples/3D/Ryu_and_Jones_1a.txt b/examples/3D/Ryu_and_Jones_1a.txt new file mode 100644 index 000000000..c0c73cced --- /dev/null +++ b/examples/3D/Ryu_and_Jones_1a.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 1a. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 1a +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.08 +# time interval for output +outstep=0.08 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=10.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=20.0 +# Magnetic field of the left state +Bx_l=1.4104739588693909 +By_l=1.4104739588693909 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=-10.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=1.4104739588693909 +By_r=1.4104739588693909 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/examples/3D/Ryu_and_Jones_4d.txt b/examples/3D/Ryu_and_Jones_4d.txt index 68fcbbbb3..6596c2b01 100644 --- a/examples/3D/Ryu_and_Jones_4d.txt +++ b/examples/3D/Ryu_and_Jones_4d.txt @@ -9,11 +9,11 @@ ################################################ # number of grid cells in the x dimension -nx=32 +nx=64 # number of grid cells in the y dimension -ny=32 +ny=64 # number of grid cells in the z dimension -nz=32 +nz=64 # final output time tout=0.16 # time interval for output diff --git a/examples/3D/Spherical_Collapse.txt b/examples/3D/Spherical_Collapse.txt index 8fad21920..739661216 100644 --- a/examples/3D/Spherical_Collapse.txt +++ b/examples/3D/Spherical_Collapse.txt @@ -32,6 +32,4 @@ yu_bcnd=1 zl_bcnd=1 zu_bcnd=1 # path to output directory -outdir=/data/groups/comp-astro/bruno/cosmo_sims/sphere_collapse/output_files/ -#outdir=/raid/bruno/data/cosmo_sims/cholla_pm/sphere_collapse/ -#outdir=/gpfs/alpine/scratch/bvilasen/ast149/sphere_collapse/output_files/ +outdir=./ diff --git a/examples/3D/Spherical_Overpressure.txt b/examples/3D/Spherical_Overpressure.txt index 7fec56a3b..0e77c4452 100644 --- a/examples/3D/Spherical_Overpressure.txt +++ b/examples/3D/Spherical_Overpressure.txt @@ -32,5 +32,4 @@ yu_bcnd=1 zl_bcnd=1 zu_bcnd=1 # path to output directory -#outdir=/gpfs/alpine/scratch/bvilasen/ast149/sphere_explosion/output_files/ -outdir=/raid/bruno/data/cosmo_sims/cholla_pm/sphere_explosion/ +outdir=./ diff --git a/examples/3D/Uniform.txt b/examples/3D/Uniform.txt index 84fd900f6..e08e76dba 100644 --- a/examples/3D/Uniform.txt +++ b/examples/3D/Uniform.txt @@ -32,4 +32,4 @@ yu_bcnd=1 zl_bcnd=1 zu_bcnd=1 # path to output directory -outdir=/raid/bruno/data/cosmo_sims/cholla_pm/uniform/ +outdir=./ diff --git a/examples/3D/advecting_field_loop.txt b/examples/3D/advecting_field_loop.txt new file mode 100644 index 000000000..eca9c382e --- /dev/null +++ b/examples/3D/advecting_field_loop.txt @@ -0,0 +1,55 @@ +# +# Parameter File for an MHD Advecting Field Loop as defined in +# [Gardiner & Stone 2008](https://ui.adsabs.harvard.edu/abs/2008JCoPh.227.4123G/abstract) +# + +################################################ +# number of grid cells in the x dimension +nx=128 +# number of grid cells in the y dimension +ny=128 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Advecting_Field_Loop +# domain properties +xmin=-0.5 +ymin=-0.5 +zmin=-1.0 +xlen=1.0 +ylen=1.0 +zlen=2.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for advecting field loop problem +# initial density +rho=1.0 +# velocity in the x direction +vx=1.0 +# velocity in the y direction +vy=1.0 +# velocity in the z direction +vz=2.0 +# initial pressure +P=1.0 +# amplitude of the loop/magnetic field background value +A=0.001 +# Radius of the Loop +radius=0.3 + +# value of gamma +gamma=1.666666666666667 + diff --git a/examples/3D/alfven_wave.txt b/examples/3D/alfven_wave.txt new file mode 100644 index 000000000..bfacbc968 --- /dev/null +++ b/examples/3D/alfven_wave.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD Alfven Wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/3) * [0, 0, +/-1, -/+2*sqrt(2), 0, -1, 2*sqrt(2), 0] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0 +rEigenVec_MomentumX=0 +rEigenVec_MomentumY=0 +rEigenVec_MomentumZ=-1 +rEigenVec_Bx=0 +rEigenVec_By=0 +rEigenVec_Bz=1 +rEigenVec_E=0 diff --git a/examples/3D/circularly_polarized_alfven_wave.txt b/examples/3D/circularly_polarized_alfven_wave.txt new file mode 100644 index 000000000..193f1ac33 --- /dev/null +++ b/examples/3D/circularly_polarized_alfven_wave.txt @@ -0,0 +1,48 @@ +# +# Parameter File for the circularly polarized Alfven Wave +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) pages 4134-4135 +# for details. +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Circularly_Polarized_Alfven_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=3.0 +ylen=1.5 +zlen=1.5 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# Polarization. 1 = right polarized, -1 = left polarized +polarization=1.0 +# velocity in the x direction. 0 for moving wave, -1 for standing wave +vx=0.0 +# pitch angle +pitch=0.72972765622696634 +# yaw angle +yaw=1.1071487177940904 +# value of gamma +gamma=1.666666666666667 diff --git a/examples/3D/constant.txt b/examples/3D/constant.txt index ca3b411e2..871fbb7b7 100644 --- a/examples/3D/constant.txt +++ b/examples/3D/constant.txt @@ -42,9 +42,9 @@ vz=0 # pressure P=1.380658e-5 # Magnetic Field -Bx=0.0 -By=0.0 -Bz=0.0 +Bx=1.0e-5 +By=2.0e-5 +Bz=3.0e-5 # value of gamma gamma=1.666666667 diff --git a/examples/3D/fast_magnetosonic.txt b/examples/3D/fast_magnetosonic.txt new file mode 100644 index 000000000..bc134a79a --- /dev/null +++ b/examples/3D/fast_magnetosonic.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD fast magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave. +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [6, +/-12, -/+4*sqrt(2), -/+2, 0, 8*sqrt(2), 4, 27] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.4472135954999579 +rEigenVec_MomentumX=0.8944271909999159 +rEigenVec_MomentumY=-0.4472135954999579 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=0.8944271909999159 +rEigenVec_Bz=0.0 +rEigenVec_E=2.0124611797498106 diff --git a/examples/3D/mhd_blast.txt b/examples/3D/mhd_blast.txt new file mode 100644 index 000000000..5d078f674 --- /dev/null +++ b/examples/3D/mhd_blast.txt @@ -0,0 +1,61 @@ +# +# Parameter File for the MHD Blast wavelength +# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details. +# + +################################################ +# number of grid cells in the x dimension +nx=200 +# number of grid cells in the y dimension +ny=300 +# number of grid cells in the z dimension +nz=200 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=MHD_Spherical_Blast +# domain properties +xmin=-0.5 +ymin=-0.75 +zmin=-0.5 +xlen=1.0 +ylen=1.5 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for MHD Blast Wave problem + +# initial density +rho=1.0 +# velocity in the x direction +vx=0.0 +# velocity in the y direction +vy=0.0 +# velocity in the z direction +vz=0.0 +# initial pressure outside the blast zone +P=0.1 +# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo +P_blast=10.0 +# The radius of the blast zone +radius=0.1 +# magnetic field in the x direction. Equal to 1/sqrt(2) +Bx=0.70710678118654746 +# magnetic field in the y direction. Equal to 1/sqrt(2) +By=0.70710678118654746 +# magnetic field in the z direction +Bz=0.0 + +# value of gamma +gamma=1.666666666666667 diff --git a/examples/3D/mhd_contact_wave.txt b/examples/3D/mhd_contact_wave.txt new file mode 100644 index 000000000..0ff7e7989 --- /dev/null +++ b/examples/3D/mhd_contact_wave.txt @@ -0,0 +1,71 @@ +# +# Parameter File for MHD contact wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/2) * [2, +/-2, 0, 0, 0, 0, 0, 1] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=1.0 +# time interval for output +outstep=1.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=1 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=1.0 +rEigenVec_MomentumX=1.0 +rEigenVec_MomentumY=0.0 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=0.0 +rEigenVec_Bz=0.0 +rEigenVec_E=0.5 diff --git a/examples/3D/orszag_tang_vortex.txt b/examples/3D/orszag_tang_vortex.txt new file mode 100644 index 000000000..9d8050073 --- /dev/null +++ b/examples/3D/orszag_tang_vortex.txt @@ -0,0 +1,42 @@ +# +# Parameter File for the Orszag-Tang Vortex +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) +# + +################################################ +# number of grid cells in the x dimension +nx=128 +# number of grid cells in the y dimension +ny=128 +# number of grid cells in the z dimension +nz=128 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Orszag_Tang_Vortex +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for Orszag-Tang Vortex. This problem is defined for a specific set +# of initial conditions which have been hard coded into the initial conditions +# function. The only thing that needs set here is the adiabatic index + +# value of gamma +gamma=1.666666666666667 diff --git a/examples/3D/slow_magnetosonic.txt b/examples/3D/slow_magnetosonic.txt new file mode 100644 index 000000000..960952b5f --- /dev/null +++ b/examples/3D/slow_magnetosonic.txt @@ -0,0 +1,72 @@ +# +# Parameter File for MHD slow magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=256 +# number of grid cells in the y dimension +ny=256 +# number of grid cells in the z dimension +nz=256 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.8944271909999159 +rEigenVec_MomentumX=0.4472135954999579 +rEigenVec_MomentumY=0.8944271909999159 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=-0.4472135954999579 +rEigenVec_Bz=0.0 +rEigenVec_E=0.6708203932499369 + diff --git a/examples/3D/sound_wave.txt b/examples/3D/sound_wave.txt index 0f3866226..6c226c0ab 100644 --- a/examples/3D/sound_wave.txt +++ b/examples/3D/sound_wave.txt @@ -34,18 +34,17 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-4 # value of gamma gamma=1.666666666666667 - diff --git a/examples/scripts/parameter_file.txt b/examples/scripts/parameter_file.txt index 48652cfe1..1cf3a08ad 100644 --- a/examples/scripts/parameter_file.txt +++ b/examples/scripts/parameter_file.txt @@ -10,8 +10,7 @@ ny=256 # number of grid cells in the z dimension nz=256 # final output time -tout=3000 -#tout=3 +tout=200 # time interval for output outstep=100 n_hydro=1 @@ -23,16 +22,15 @@ n_rotated_projection=5 gamma=1.66666667 # name of initial conditions init=Disk_3D_particles -bc_potential_type=1 #init=Disk_3D -#nfull=100 +bc_potential_type=1 # domain properties -xmin=-15 -ymin=-15 -zmin=-15 -xlen=30 -ylen=30 -zlen=30 +xmin=-2 +ymin=-2 +zmin=-2 +xlen=4 +ylen=4 +zlen=4 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 @@ -52,3 +50,4 @@ flag_delta=2 ddelta_dt=-0.001 # path to output directory outdir=./raw/ +prng_seed=42 diff --git a/extras/submit_job_lux b/extras/submit_job_lux deleted file mode 100644 index 39c919f2d..000000000 --- a/extras/submit_job_lux +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=cholla_test # Job name -#SBATCH --partition=gpuq # queue for job submission -#SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL) -#SBATCH --mail-user=brvillas@ucsc.edu # Where to send mail -#SBATCH --ntasks=8 # Number of MPI ranks -#SBATCH --nodes=1 # Number of nodes -#SBATCH --ntasks-per-node=8 # How many tasks on each node -#SBATCH --time=00:10:00 # Time limit hrs:min:sec -#SBATCH --output=cuda_test_%j.log # Standard output and error log - -pwd; hostname; date - -echo "Running program on $SLURM_JOB_NUM_NODES nodes with $SLURM_NTASKS total tasks, with each node getting $SLURM_NTASKS_PER_NODE running on cores." - -module load hdf5 -module load openmpi/4.0.1-cuda -module load cuda10.1/10.1.168 - -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/brvillas/code/grackle/lib - - -cd /home/brvillas/cholla - -mpirun -N 1 --map-by ppr:8:node ./cholla examples/3D/Spherical_Collapse.txt - -date \ No newline at end of file diff --git a/extras/submit_job_summit.lsf b/extras/submit_job_summit.lsf deleted file mode 100644 index 5c2552c0a..000000000 --- a/extras/submit_job_summit.lsf +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -# Begin LSF Directives -#BSUB -P AST149 -#BSUB -W 0:10 -#BSUB -nnodes 2 -#BSUB -J sphere_256 -#BSUB -o sphere_256.o%J -#BSUB -e sphere_256.e%J -#BSUB -alloc_flags "smt4" - -module load hdf5 -module load cuda - -export WORK_DIR=$MEMBERWORK/ast149/sphere_explosion - -cd $MEMBERWORK/ast149/cholla -date -#export OMP_NUM_THREADS=10 -jsrun -n 8 -a 1 -c 7 -g 1 -r 4 -l CPU-CPU -d packed -b packed:7 ./cholla examples/3D/Spherical_Overpressure.txt > $WORK_DIR/run_output.log |sort diff --git a/python_scripts/README.md b/python_scripts/README.md index 5a462e8c1..acda923b7 100644 --- a/python_scripts/README.md +++ b/python_scripts/README.md @@ -5,15 +5,8 @@ You will likely develop more customized, robust, and flexible scripts for your o These simple scripts here are intended to help you understand the basics of the generated data from Cholla. ## Merging HDF5 files -Multi-processor runs generate HDF5 files per-timestep per-processor. -To treat each timestep together we want to merge those per-processor HDF5 files. -| Script | Concatenate | -| ------ | ----------- | -`cat_dset_3d.py` | 3D HDF5 datasets -`cat_projection.py` | The on-axis projection data created when the -DPROJECTION flag is turned on -`cat_rotated_projection.py` | The rotated projection data created when the -DROTATED_PROJECTION flag is turned on -`cat_slice.py` | The on-axis slice data created when the -DSLICES flag is turned on +Multi-processor runs generate HDF5 files per-timestep per-processor. Merging these per process output into a single file can be done with the concatenation scripts detailed in the "Outputs" section of the wiki. ## Plotting data We here present simple Python matplotlib-based scripts to plot density, velocity, energy, and pressure. diff --git a/python_scripts/cat_dset_3D.py b/python_scripts/cat_dset_3D.py deleted file mode 100755 index 0c6d4b3ad..000000000 --- a/python_scripts/cat_dset_3D.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -# Example file for concatenating 3D hdf5 datasets - -import h5py -import numpy as np - -ns = 0 -ne = 0 -n_proc = 16 # number of processors that did the calculations -istart = 0*n_proc -iend = 1*n_proc -dnamein = './hdf5/raw/' -dnameout = './hdf5/' -DE = 0 - -# loop over outputs -for n in range(ns, ne+1): - - # loop over files for a given output - for i in range(istart, iend): - - # open the output file for writing (don't overwrite if exists) - fileout = h5py.File(dnameout+str(n)+'.h5', 'a') - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - units = ['time_unit', 'mass_unit', 'length_unit', 'energy_unit', 'velocity_unit', 'density_unit'] - for unit in units: - fileout.attrs[unit] = [head[unit][0]] - - d = fileout.create_dataset("density", (nx, ny, nz), chunks=True) - mx = fileout.create_dataset("momentum_x", (nx, ny, nz), chunks=True) - my = fileout.create_dataset("momentum_y", (nx, ny, nz), chunks=True) - mz = fileout.create_dataset("momentum_z", (nx, ny, nz), chunks=True) - E = fileout.create_dataset("Energy", (nx, ny, nz), chunks=True) - if (DE): - GE = fileout.create_dataset("GasEnergy", (nx, ny, nz), chunks=True) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - fileout['density'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['density'] - fileout['momentum_x'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_x'] - fileout['momentum_y'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_y'] - fileout['momentum_z'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['momentum_z'] - fileout['Energy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['Energy'] - if (DE): - fileout['GasEnergy'][xs:xs+nxl,ys:ys+nyl,zs:zs+nzl] = filein['GasEnergy'] - - filein.close() - - fileout.close() diff --git a/python_scripts/cat_particles.py b/python_scripts/cat_particles.py deleted file mode 100644 index 03cbcd71c..000000000 --- a/python_scripts/cat_particles.py +++ /dev/null @@ -1,91 +0,0 @@ -# Example file for concatenating particle data - -import h5py -import numpy as np - -ns = 0 -ne = 300 -n_procs = 4 # number of processors that did the cholla calculation -dnamein = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-12.49-356588-SOR_ONLY_PARTICLES_DISK/raw/' -dnameout = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-12.49-356588-SOR_ONLY_PARTICLES_DISK/particles_cat/' - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_particles.h5', 'w') - - if (n % 10 == 0): print(str(n)) - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_particles.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['velocity_unit'] = head['velocity_unit'] - fileout.attrs['length_unit'] = head['length_unit'] - fileout.attrs['particle_mass'] = head['particle_mass'] - fileout.attrs['density_unit'] = head['density_unit'] - - x = np.array([]) - y = np.array([]) - z = np.array([]) - vx = np.array([]) - vy = np.array([]) - vz = np.array([]) - particle_ids = np.array([]) - density = np.zeros((nx, ny, nz)) - n_total_particles = 0 - - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - n_total_particles += head['n_particles_local'] - density[xs:xs+nxl, ys:ys+nyl, zs:zs+nzl] += filein['density'] - x = np.append(x, filein['pos_x']) - y = np.append(y, filein['pos_y']) - z = np.append(z, filein['pos_z']) - vx = np.append(vx, filein['vel_x']) - vy = np.append(vy, filein['vel_y']) - vz = np.append(vz, filein['vel_z']) - particle_ids = np.append(particle_ids, filein['particle_IDs']) - - filein.close() - - # write out the new datasets - fileout.create_dataset('x', data=x) - fileout.create_dataset('y', data=y) - fileout.create_dataset('z', data=z) - fileout.create_dataset('vx', data=vx) - fileout.create_dataset('vy', data=vy) - fileout.create_dataset('vz', data=vz) - fileout.create_dataset('particle_ids', data=particle_ids) - fileout.create_dataset('density', data=density) - fileout.attrs['n_total_particles'] = n_total_particles - - fileout.close() diff --git a/python_scripts/cat_projection.py b/python_scripts/cat_projection.py deleted file mode 100755 index 29b56a416..000000000 --- a/python_scripts/cat_projection.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 -# Example file for concatenating on-axis projection data -# created when the -DPROJECTION flag is turned on - -import h5py -import numpy as np - -ns = 0 -ne = 0 -n_procs = 16 # number of processors that did the cholla calculation -dnamein = './hdf5/raw/' -dnameout = './hdf5/' - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_proj.h5', 'w') - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_proj.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['dims'] = [nx, ny, nz] - fileout.attrs['gamma'] = [head['gamma'][0]] - fileout.attrs['t'] = [head['t'][0]] - fileout.attrs['dt'] = [head['dt'][0]] - fileout.attrs['n_step'] = [head['n_step'][0]] - - dxy = np.zeros((nx,ny)) - dxz = np.zeros((nx,nz)) - Txy = np.zeros((nx,ny)) - Txz = np.zeros((nx,nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - dxy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] - dxz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] - Txy[xs:xs+nxl,ys:ys+nyl] += filein['T_xy'] - Txz[xs:xs+nxl,zs:zs+nzl] += filein['T_xz'] - - filein.close() - - # write out the new datasets - fileout.create_dataset('d_xy', data=dxy) - fileout.create_dataset('d_xz', data=dxz) - fileout.create_dataset('T_xy', data=Txy) - fileout.create_dataset('T_xz', data=Txz) - - fileout.close() diff --git a/python_scripts/cat_rotated_projection.py b/python_scripts/cat_rotated_projection.py deleted file mode 100755 index 6e769ce55..000000000 --- a/python_scripts/cat_rotated_projection.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# Example file for concatenating rotated projection data -# created when the -DROTATED_PROJECTION flag is turned on - -import h5py -import numpy as np - -ns = 0 -ne = 0 -n_procs = 16 # number of processors that did the cholla calculation -dnamein = './hdf5/raw/' -dnameout = './hdf5/' - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_rot_proj.h5', 'w') - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_rot_proj.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the arrays to hold the output data - if (i == 0): - nxr = int(head['nxr']) - nzr = int(head['nzr']) - Lx = head['Lx'] - Lz = head['Lz'] - delta = head['delta'] - theta = head['theta'] - phi = head['phi'] - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - fileout.attrs['nxr'] = nxr - fileout.attrs['nzr'] = nzr - fileout.attrs['Lx'] = Lx - fileout.attrs['Lz'] = Lz - fileout.attrs['delta'] = delta - fileout.attrs['theta'] = theta - fileout.attrs['phi'] = phi - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - - d_xzr = np.zeros((nxr, nzr)) - vx_xzr = np.zeros((nxr, nzr)) - vy_xzr = np.zeros((nxr, nzr)) - vz_xzr = np.zeros((nxr, nzr)) - T_xzr = np.zeros((nxr, nzr)) - - # write data from individual processor file to - # correct location in concatenated file - nx_min = int(head['nx_min']) - nx_max = int(head['nx_max']) - nz_min = int(head['nz_min']) - nz_max = int(head['nz_max']) - - d_xzr[nx_min:nx_max,nz_min:nz_max] += filein['d_xzr'][:] - vx_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vx_xzr'][:] - vy_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vy_xzr'][:] - vz_xzr[nx_min:nx_max,nz_min:nz_max] += filein['vz_xzr'][:] - T_xzr[nx_min:nx_max,nz_min:nz_max] += filein['T_xzr'][:] - - filein.close() - - # write out the new datasets - fileout.create_dataset("d_xzr", data=d_xzr) - fileout.create_dataset("vx_xzr", data=vx_xzr) - fileout.create_dataset("vy_xzr", data=vy_xzr) - fileout.create_dataset("vz_xzr", data=vz_xzr) - fileout.create_dataset("T_xzr", data=T_xzr) - - fileout.close() - - - diff --git a/python_scripts/cat_slice.py b/python_scripts/cat_slice.py deleted file mode 100644 index 7b6d15e12..000000000 --- a/python_scripts/cat_slice.py +++ /dev/null @@ -1,130 +0,0 @@ -# Example file for concatenating on-axis slice data -# created when the -DSLICES flag is turned on - -import h5py -import numpy as np - -ns = 0 -ne = 2 -n_procs = 4 # number of processors that did the cholla calculation -dnamein = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-14.17-357075-SOR_HYDRO_DISK/raw/' -dnameout = '/gpfs/alpine/proj-shared/csc380/orlandow/o_cholla/out.21Sep20-Mon-14.17-357075-SOR_HYDRO_DISK/catted_files' - -DE = True # set to True if Dual Energy flag was used -SCALAR = False # set to True if Scalar was used - -# loop over the output times -for n in range(ns, ne+1): - - # open the output file for writing - fileout = h5py.File(dnameout+str(n)+'_slice.h5', 'w') - - # loop over files for a given output time - for i in range(0, n_procs): - - # open the input file for reading - filein = h5py.File(dnamein+str(n)+'_slice.h5.'+str(i), 'r') - # read in the header data from the input file - head = filein.attrs - - # if it's the first input file, write the header attributes - # and create the datasets in the output file - if (i == 0): - gamma = head['gamma'] - t = head['t'] - dt = head['dt'] - n_step = head['n_step'] - nx = head['dims'][0] - ny = head['dims'][1] - nz = head['dims'][2] - fileout.attrs['gamma'] = gamma - fileout.attrs['t'] = t - fileout.attrs['dt'] = dt - fileout.attrs['n_step'] = n_step - fileout.attrs['dims'] = [nx, ny, nz] - - d_xy = np.zeros((nx,ny)) - d_xz = np.zeros((nx,nz)) - d_yz = np.zeros((ny,nz)) - mx_xy = np.zeros((nx,ny)) - mx_xz = np.zeros((nx,nz)) - mx_yz = np.zeros((ny,nz)) - my_xy = np.zeros((nx,ny)) - my_xz = np.zeros((nx,nz)) - my_yz = np.zeros((ny,nz)) - mz_xy = np.zeros((nx,ny)) - mz_xz = np.zeros((nx,nz)) - mz_yz = np.zeros((ny,nz)) - E_xy = np.zeros((nx,ny)) - E_xz = np.zeros((nx,nz)) - E_yz = np.zeros((ny,nz)) - if DE: - GE_xy = np.zeros((nx,ny)) - GE_xz = np.zeros((nx,nz)) - GE_yz = np.zeros((ny,nz)) - if SCALAR: - scalar_xy = np.zeros((nx,ny)) - scalar_xz = np.zeros((nx,nz)) - scalar_yz = np.zeros((ny,nz)) - - # write data from individual processor file to - # correct location in concatenated file - nxl = head['dims_local'][0] - nyl = head['dims_local'][1] - nzl = head['dims_local'][2] - xs = head['offset'][0] - ys = head['offset'][1] - zs = head['offset'][2] - - d_xy[xs:xs+nxl,ys:ys+nyl] += filein['d_xy'] - d_xz[xs:xs+nxl,zs:zs+nzl] += filein['d_xz'] - d_yz[ys:ys+nyl,zs:zs+nzl] += filein['d_yz'] - mx_xy[xs:xs+nxl,ys:ys+nyl] += filein['mx_xy'] - mx_xz[xs:xs+nxl,zs:zs+nzl] += filein['mx_xz'] - mx_yz[ys:ys+nyl,zs:zs+nzl] += filein['mx_yz'] - my_xy[xs:xs+nxl,ys:ys+nyl] += filein['my_xy'] - my_xz[xs:xs+nxl,zs:zs+nzl] += filein['my_xz'] - my_yz[ys:ys+nyl,zs:zs+nzl] += filein['my_yz'] - mz_xy[xs:xs+nxl,ys:ys+nyl] += filein['mz_xy'] - mz_xz[xs:xs+nxl,zs:zs+nzl] += filein['mz_xz'] - mz_yz[ys:ys+nyl,zs:zs+nzl] += filein['mz_yz'] - E_xy[xs:xs+nxl,ys:ys+nyl] += filein['E_xy'] - E_xz[xs:xs+nxl,zs:zs+nzl] += filein['E_xz'] - E_yz[ys:ys+nyl,zs:zs+nzl] += filein['E_yz'] - if DE: - GE_xy[xs:xs+nxl,ys:ys+nyl] += filein['GE_xy'] - GE_xz[xs:xs+nxl,zs:zs+nzl] += filein['GE_xz'] - GE_yz[ys:ys+nyl,zs:zs+nzl] += filein['GE_yz'] - if SCALAR: - scalar_xy[xs:xs+nxl,ys:ys+nyl] += filein['scalar_xy'] - scalar_xz[xs:xs+nxl,zs:zs+nzl] += filein['scalar_xz'] - scalar_yz[ys:ys+nyl,zs:zs+nzl] += filein['scalar_yz'] - - filein.close() - - # wrte out the new datasets - fileout.create_dataset('d_xy', data=d_xy) - fileout.create_dataset('d_xz', data=d_xz) - fileout.create_dataset('d_yz', data=d_yz) - fileout.create_dataset('mx_xy', data=mx_xy) - fileout.create_dataset('mx_xz', data=mx_xz) - fileout.create_dataset('mx_yz', data=mx_yz) - fileout.create_dataset('my_xy', data=my_xy) - fileout.create_dataset('my_xz', data=my_xz) - fileout.create_dataset('my_yz', data=my_yz) - fileout.create_dataset('mz_xy', data=mz_xy) - fileout.create_dataset('mz_xz', data=mz_xz) - fileout.create_dataset('mz_yz', data=mz_yz) - fileout.create_dataset('E_xy', data=E_xy) - fileout.create_dataset('E_xz', data=E_xz) - fileout.create_dataset('E_yz', data=E_yz) - if DE: - fileout.create_dataset('GE_xy', data=GE_xy) - fileout.create_dataset('GE_xz', data=GE_xz) - fileout.create_dataset('GE_yz', data=GE_yz) - if SCALAR: - fileout.create_dataset('scalar_xy', data=scalar_xy) - fileout.create_dataset('scalar_xz', data=scalar_xz) - fileout.create_dataset('scalar_yz', data=scalar_yz) - - fileout.close() diff --git a/python_scripts/concat_2d_data.py b/python_scripts/concat_2d_data.py new file mode 100755 index 000000000..9c4e0dd86 --- /dev/null +++ b/python_scripts/concat_2d_data.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +""" +Python script for concatenating 2D hdf5 datasets for when -DSLICES, +-DPROJECTION, or -DROTATED_PROJECTION is turned on in Cholla. Includes a CLI for +concatenating Cholla HDF5 datasets and can be imported into other scripts where +the `concat_2d_dataset` function can be used to concatenate the HDF5 files. + +Generally the easiest way to import this script is to add the `python_scripts` +directory to your python path in your script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import concat_2d_data +``` +""" + +import h5py +import pathlib +import numpy as np + +import concat_internals + +# ============================================================================== +def concat_2d_dataset(output_directory: pathlib.Path, + num_processes: int, + output_number: int, + dataset_kind: str, + build_source_path, + concat_xy: bool = True, + concat_yz: bool = True, + concat_xz: bool = True, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None, + chunking = None) -> None: + """Concatenate 2D HDF5 Cholla datasets. i.e. take the single files + generated per process and concatenate them into a single, large file. This + function concatenates a single output time and can be called multiple times, + potentially in parallel, to concatenate multiple output times. + + Parameters + ---------- + output_directory : pathlib.Path + The directory containing the new concatenated files + num_processes : int + The number of ranks that Cholla was run with + output_number : int + The output number to concatenate + dataset_kind : str + The type of 2D dataset to concatenate. Can be 'slice', 'proj', or 'rot_proj'. + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. + concat_xy : bool + If True then concatenate the XY slices/projections. Defaults to True. + concat_yz : bool + If True then concatenate the YZ slices/projections. Defaults to True. + concat_xz : bool + If True then concatenate the XZ slices/projections. Defaults to True. + skip_fields : list + List of fields to skip concatenating. Defaults to []. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type : str + What kind of compression to use on the output data. Defaults to None. + compression_options : str + What compression settings to use if compressing. Defaults to None. + chunking : bool or tuple + Whether or not to use chunking and the chunk size. Defaults to None. + output_directory: pathlib.Path : + + num_processes: int : + + output_number: int : + + dataset_kind: str : + + concat_xy: bool : + (Default value = True) + concat_yz: bool : + (Default value = True) + concat_xz: bool : + (Default value = True) + skip_fields: list : + (Default value = []) + destination_dtype: np.dtype : + (Default value = None) + compression_type: str : + (Default value = None) + compression_options: str : + (Default value = None) + + Returns + ------- + + """ + + # Error checking + assert num_processes > 1, 'num_processes must be greater than 1' + assert output_number >= 0, 'output_number must be greater than or equal to 0' + assert dataset_kind in ['slice', 'proj', 'rot_proj'], '`dataset_kind` can only be one of "slice", "proj", "rot_proj".' + + # Open destination file + destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_{dataset_kind}.h5') + + # Setup the destination file + with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file: + # Copy over header + destination_file = concat_internals.copy_header(source_file, destination_file) + + # Get a list of all datasets in the source file + datasets_to_copy = list(source_file.keys()) + + # Filter the datasets to only include those that need to be copied + if not concat_xy: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xy' in dataset] + if not concat_yz: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'yz' in dataset] + if not concat_xz: + datasets_to_copy = [dataset for dataset in datasets_to_copy if not 'xz' in dataset] + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + # Create the datasets in the destination file + zero_array = np.zeros(1) + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype + + dataset_shape = __get_2d_dataset_shape(source_file, dataset) + + # Create array to initialize data to zero, this is required for projections + if zero_array.shape != dataset_shape: + zero_array = np.zeros(dataset_shape) + + destination_file.create_dataset(name=dataset, + shape=dataset_shape, + data=zero_array, + dtype=dtype, + chunks=chunking, + compression=compression_type, + compression_opts=compression_options) + + # Copy data + for rank in range(num_processes): + # Open source file + source_file = h5py.File(build_source_path(proc_id = rank, nfile = output_number), 'r') + + # Loop through and copy datasets + for dataset in datasets_to_copy: + # Determine locations and shifts for writing + (i0_start, i0_end, i1_start, i1_end), file_in_slice = __write_bounds_2d_dataset(source_file, dataset) + + # If this is a slice dataset we can skip loading the source file if that + # file isn't in the slice + if dataset_kind == 'slice' and not file_in_slice: + continue + + # Copy the data, the summation is required for projections but not slices + destination_file[dataset][i0_start:i0_end, + i1_start:i1_end] += source_file[dataset] + + # Now that the copy is done we close the source file + source_file.close() + + # Close destination file now that it is fully constructed + destination_file.close() +# ============================================================================== + +# ============================================================================== +def __get_2d_dataset_shape(source_file: h5py.File, dataset: str) -> tuple: + """Determine the shape of the full 2D dataset + + Args: + source_file (h5py.File): The source file the get the shape information from + dataset (str): The dataset to get the shape of + + Raises: + ValueError: If the dataset name isn't a 2D dataset name + + Returns: + tuple: The dimensions of the dataset + """ + + if 'xzr' in dataset: + return (source_file.attrs['nxr'][0], source_file.attrs['nzr'][0]) + + nx, ny, nz = source_file.attrs['dims'] + if 'xy' in dataset: + dimensions = (nx, ny) + elif 'yz' in dataset: + dimensions = (ny, nz) + elif 'xz' in dataset: + dimensions = (nx, nz) + else: + raise ValueError(f'Dataset "{dataset}" is not a slice.') + + return dimensions +# ============================================================================== + +# ============================================================================== +def __write_bounds_2d_dataset(source_file: h5py.File, dataset: str) -> tuple: + """Determine the bounds of the concatenated file to write to + + Args: + source_file (h5py.File): The source file to read from + dataset (str): The name of the dataset to read from the source file + + Raises: + ValueError: If the dataset name isn't a 2D dataset name + + Returns: + tuple: The write bounds for the concatenated file to be used like + `output_file[dataset][return[0]:return[1], return[2]:return[3]]` followed by a bool to indicate if the file is + in the slice if concatenating a slice + """ + + if 'xzr' in dataset: + return (source_file.attrs['nx_min'][0], source_file.attrs['nx_max'][0], + source_file.attrs['nz_min'][0], source_file.attrs['nz_max'][0]), True + + nx, ny, nz = source_file.attrs['dims'] + nx_local, ny_local, nz_local = source_file.attrs['dims_local'] + x_start, y_start, z_start = source_file.attrs['offset'] + + if 'xy' in dataset: + file_in_slice = z_start <= nz//2 <= z_start+nz_local + bounds = (x_start, x_start+nx_local, y_start, y_start+ny_local) + elif 'yz' in dataset: + file_in_slice = x_start <= nx//2 <= x_start+nx_local + bounds = (y_start, y_start+ny_local, z_start, z_start+nz_local) + elif 'xz' in dataset: + file_in_slice = y_start <= ny//2 <= y_start+ny_local + bounds = (x_start, x_start+nx_local, z_start, z_start+nz_local) + else: + raise ValueError(f'Dataset "{dataset}" is not a slice or projection.') + + return bounds, file_in_slice +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + + cli = concat_internals.common_cli() + cli.add_argument('-d', '--dataset-kind', type=str, required=True, help='What kind of 2D dataset to concatnate. Options are "slice", "proj", and "rot_proj"') + cli.add_argument('--disable-xy', default=True, action='store_false', help='Disables concating the XY datasets.') + cli.add_argument('--disable-yz', default=True, action='store_false', help='Disables concating the YZ datasets.') + cli.add_argument('--disable-xz', default=True, action='store_false', help='Disables concating the XZ datasets.') + args = cli.parse_args() + + build_source_path = concat_internals.get_source_path_builder( + source_directory = args.source_directory, + pre_extension_suffix = f'_{args.dataset_kind}', + known_output_snap = args.concat_outputs[0]) + + # Perform the concatenation + for output in args.concat_outputs: + concat_2d_dataset(output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + dataset_kind=args.dataset_kind, + build_source_path = build_source_path, + concat_xy=args.disable_xy, + concat_yz=args.disable_yz, + concat_xz=args.disable_xz, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts, + chunking=args.chunking) + + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') diff --git a/python_scripts/concat_3d_data.py b/python_scripts/concat_3d_data.py new file mode 100755 index 000000000..1d5ba8228 --- /dev/null +++ b/python_scripts/concat_3d_data.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +""" +Python script for concatenating 3D hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be +imported into other scripts where the `concat_3d_dataset` function can be used to concatenate the datasets. + +Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your +script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import concat_3d_data +``` +""" + +import h5py +import numpy as np +import pathlib + +import concat_internals + +# ============================================================================== +def concat_3d_dataset(output_directory: pathlib.Path, + num_processes: int, + output_number: int, + build_source_path, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None, + chunking = None) -> None: + """Concatenate a single 3D HDF5 Cholla dataset. i.e. take the single files + generated per process and concatenate them into a single, large file. + + Parameters + ---------- + output_directory : pathlib.Path + The directory containing the new concatenated files + num_processes : int + The number of ranks that Cholla was run with + output_number : int + The output number to concatenate + skip_fields : list + List of fields to skip concatenating. Defaults to []. + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type : str + What kind of compression to use on the output data. Defaults to None. + compression_options : str + What compression settings to use if compressing. Defaults to None. + chunking : bool or tuple + Whether or not to use chunking and the chunk size. Defaults to None. + output_directory: pathlib.Path : + + num_processes: int : + + output_number: int : + + skip_fields: list : + (Default value = []) + destination_dtype: np.dtype : + (Default value = None) + compression_type: str : + (Default value = None) + compression_options: str : + (Default value = None) + + Returns + ------- + + """ + + # Error checking + assert num_processes > 1, 'num_processes must be greater than 1' + assert output_number >= 0, 'output_number must be greater than or equal to 0' + + # Open the output file for writing + destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}.h5') + + # Setup the output file + with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file: + # Copy header data + destination_file = concat_internals.copy_header(source_file, destination_file) + + # Create the datasets in the output file + datasets_to_copy = list(source_file.keys()) + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype + + data_shape = source_file.attrs['dims'] + + if dataset == 'magnetic_x': data_shape[0] += 1 + if dataset == 'magnetic_y': data_shape[1] += 1 + if dataset == 'magnetic_z': data_shape[2] += 1 + + destination_file.create_dataset(name=dataset, + shape=data_shape, + dtype=dtype, + chunks=chunking, + compression=compression_type, + compression_opts=compression_options) + + # loop over files for a given output + for i in range(0, num_processes): + # open the input file for reading + source_file = h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') + + # Compute the offset slicing + nx_local, ny_local, nz_local = source_file.attrs['dims_local'] + x_start, y_start, z_start = source_file.attrs['offset'] + x_end, y_end, z_end = x_start+nx_local, y_start+ny_local, z_start+nz_local + + # write data from individual processor file to correct location in concatenated file + for dataset in datasets_to_copy: + magnetic_offset = [0,0,0] + if dataset == 'magnetic_x': magnetic_offset[0] = 1 + if dataset == 'magnetic_y': magnetic_offset[1] = 1 + if dataset == 'magnetic_z': magnetic_offset[2] = 1 + + destination_file[dataset][x_start:x_end+magnetic_offset[0], + y_start:y_end+magnetic_offset[1], + z_start:z_end+magnetic_offset[2]] = source_file[dataset] + + # Now that the copy is done we close the source file + source_file.close() + + # Close destination file now that it is fully constructed + destination_file.close() +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + + cli = concat_internals.common_cli() + args = cli.parse_args() + + build_source_path = concat_internals.get_source_path_builder( + source_directory = args.source_directory, + pre_extension_suffix = '', + known_output_snap = args.concat_outputs[0]) + + # Perform the concatenation + for output in args.concat_outputs: + concat_3d_dataset(output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + build_source_path = build_source_path, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts, + chunking=args.chunking) + + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') diff --git a/python_scripts/concat_internals.py b/python_scripts/concat_internals.py new file mode 100755 index 000000000..bc615012e --- /dev/null +++ b/python_scripts/concat_internals.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +""" +Contains all the common tools for the various concatnation functions/scipts +""" + +import h5py +import argparse +import functools +import pathlib + +# ============================================================================== +def destination_safe_open(filename: pathlib.Path) -> h5py.File: + """Opens a HDF5 file safely and provides useful error messages for some common failure modes + + Parameters + ---------- + filename : pathlib.Path + + The full path and name of the file to open : + + filename: pathlib.Path : + + + Returns + ------- + h5py.File + + The opened HDF5 file object + """ + + try: + destination_file = h5py.File(filename, 'w-') + except FileExistsError: + # It might be better for this to simply print the error message and return + # rather than exiting. That way if a single call fails in a parallel + # environment it doesn't take down the entire job + raise FileExistsError(f'File "{filename}" already exists and will not be overwritten, skipping.') + + return destination_file +# ============================================================================== + +# ============================================================================== +def copy_header(source_file: h5py.File, destination_file: h5py.File) -> h5py.File: + """Copy the attributes of one HDF5 file to another, skipping all fields that are specific to an individual rank + + Parameters + ---------- + source_file : h5py.File + The source file + destination_file : h5py.File + The destination file + source_file: h5py.File : + + destination_file: h5py.File : + + + Returns + ------- + h5py.File + The destination file with the new header attributes + """ + fields_to_skip = ['dims_local', 'offset', 'n_particles_local'] + + for attr_key in source_file.attrs.keys(): + if attr_key not in fields_to_skip: + destination_file.attrs[attr_key] = source_file.attrs[attr_key] + + return destination_file +# ============================================================================== + +# ============================================================================== +def common_cli() -> argparse.ArgumentParser: + """This function provides the basis for the common CLI amongst the various concatenation scripts. It returns an + `argparse.ArgumentParser` object to which additional arguments can be passed before the final `.parse_args()` method + is used. + + Parameters + ---------- + + Returns + ------- + argparse.ArgumentParser + The common components of the CLI for the concatenation scripts + """ + + # ============================================================================ + def concat_output(raw_argument: str) -> list: + """Function used to parse the `--concat-output` argument + """ + # Check if the string is empty + if len(raw_argument) < 1: + raise ValueError('The --concat-output argument must not be of length zero.') + + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('[', '') + cleaned_argument = cleaned_argument.replace(']', '') + + # Check that it only has the allowed characters + allowed_charaters = set('0123456789,-') + if not set(cleaned_argument).issubset(allowed_charaters): + raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") + + # Split on commas + cleaned_argument = cleaned_argument.split(',') + + # Generate the final list + iterable_argument = set() + for arg in cleaned_argument: + if '-' not in arg: + if int(arg) < 0: + raise ValueError() + iterable_argument.add(int(arg)) + else: + start, end = arg.split('-') + start, end = int(start), int(end) + if end < start: + raise ValueError('The end of a range must be larger than the start of the range.') + if start < 0: + raise ValueError() + iterable_argument = iterable_argument.union(set(range(start, end+1))) + + return list(iterable_argument) + # ============================================================================ + + # ============================================================================ + def positive_int(raw_argument: str) -> int: + arg = int(raw_argument) + if arg < 0: + raise ValueError('Argument must be 0 or greater.') + + return arg + # ============================================================================ + + # ============================================================================ + def skip_fields(raw_argument: str) -> list: + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('[', '') + cleaned_argument = cleaned_argument.replace(']', '') + cleaned_argument = cleaned_argument.split(',') + + return cleaned_argument + # ============================================================================ + + # ============================================================================ + def chunk_arg(raw_argument: str) -> tuple: + # Strip unneeded characters + cleaned_argument = raw_argument.replace(' ', '') + cleaned_argument = cleaned_argument.replace('(', '') + cleaned_argument = cleaned_argument.replace(')', '') + + # Check that it only has the allowed characters + allowed_charaters = set('0123456789,') + if not set(cleaned_argument).issubset(allowed_charaters): + raise ValueError("Argument contains incorrect characters. Should only contain '0-9', ',', and '-'.") + + # Convert to a tuple and return + return tuple([int(i) for i in cleaned_argument.split(',')]) + # ============================================================================ + + # Initialize the CLI + cli = argparse.ArgumentParser() + + # Required Arguments + cli.add_argument('-s', '--source-directory', type=pathlib.Path, required=True, help='The path to the directory for the source HDF5 files.') + cli.add_argument('-o', '--output-directory', type=pathlib.Path, required=True, help='The path to the directory to write out the concatenated HDF5 files.') + cli.add_argument('-n', '--num-processes', type=positive_int, required=True, help='The number of processes that were used') + cli.add_argument('-c', '--concat-outputs', type=concat_output, required=True, help='Which outputs to concatenate. Can be a single number (e.g. 8), a range (e.g. 2-9), or a list (e.g. [1,2,3]). Ranges are inclusive') + + # Optional Arguments + cli.add_argument('--skip-fields', type=skip_fields, default=[], help='List of fields to skip concatenating. Defaults to empty.') + cli.add_argument('--dtype', type=str, default=None, help='The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets.') + cli.add_argument('--compression-type', type=str, default=None, help='What kind of compression to use on the output data. Defaults to None.') + cli.add_argument('--compression-opts', type=str, default=None, help='What compression settings to use if compressing. Defaults to None.') + cli.add_argument('--chunking', type=chunk_arg, default=None, nargs='?', const=True, help='Enable chunking of the output file. Default is `False`. If set without an argument then the chunk size will be automatically chosen or a tuple can be passed to indicate the chunk size desired.') + + return cli +# ============================================================================== + +def _get_source_path(proc_id : int, source_directory : pathlib.Path, + pre_extension_suffix : str, nfile : int, new_style : bool, + extension : str = '.h5'): + dirname = str(source_directory) + if new_style: + out = f"{dirname}/{nfile}/{nfile}{pre_extension_suffix}{extension}.{proc_id}" + else: + # in principle, when source_directory isn't an empty string and it doesn't end + # end in a '/', part of it should act like a filename prefix + # -> with that said, the concatenation scripts have not supported this behavior + # since we've made use of pathlib.Path + out = f"{dirname}/{nfile}{pre_extension_suffix}{extension}.{proc_id}" + return pathlib.Path(out) + +def get_source_path_builder(source_directory : pathlib.Path, + pre_extension_suffix : str, + known_output_snap : int): + """ + Source files (that are to be concatenated) have one of 2 formats. This identifies + the format in use and returns a function appropriate for building the pathnames + + This function auto-detect the format and returns a function to construct paths to these + files + """ + + # try newer format first: + common_kw = {'source_directory' : source_directory, 'extension' : '.h5', + 'pre_extension_suffix' : pre_extension_suffix} + new_style_path = _get_source_path(proc_id = 0, nfile = known_output_snap, + new_style = True, **common_kw) + old_style_path = _get_source_path(proc_id = 0, nfile = known_output_snap, + new_style = False, **common_kw) + if new_style_path.is_file(): + return functools.partial(_get_source_path, new_style = True, **common_kw) + elif old_style_path.is_file(): + return functools.partial(_get_source_path, new_style = False, **common_kw) + raise RuntimeError( + "Could not find any files to concatenate. We searched " + f"{new_style_path!s} and {old_style_path!s}" + ) \ No newline at end of file diff --git a/python_scripts/concat_particles.py b/python_scripts/concat_particles.py new file mode 100755 index 000000000..89bb3bc1a --- /dev/null +++ b/python_scripts/concat_particles.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +""" +Python script for concatenating particle hdf5 datasets. Includes a CLI for concatenating Cholla HDF5 datasets and can be +imported into other scripts where the `concat_particles_dataset` function can be used to concatenate the datasets. + +Generally the easiest way to import this script is to add the `python_scripts` directory to your python path in your +script like this: +``` +import sys +sys.path.append('/PATH/TO/CHOLLA/python_scripts') +import concat_particles +``` +""" + +import h5py +import numpy as np +import pathlib + +import concat_internals + +# ====================================================================================================================== +def concat_particles_dataset(output_directory: pathlib.Path, + num_processes: int, + output_number: int, + build_source_path, + skip_fields: list = [], + destination_dtype: np.dtype = None, + compression_type: str = None, + compression_options: str = None, + chunking = None) -> None: + """Concatenate a single particle HDF5 Cholla dataset. i.e. take the single + files generated per process and concatenate them into a single, large file. + + Parameters + ---------- + output_directory : pathlib.Path + The directory containing the new concatenated files + num_processes : int + The number of ranks that Cholla was run with + output_number : int + The output number to concatenate + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. + skip_fields : list + List of fields to skip concatenating. Defaults to []. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. Defaults to the same as the input datasets. + compression_type : str + What kind of compression to use on the output data. Defaults to None. + compression_options : str + What compression settings to use if compressing. Defaults to None. + chunking : bool or tuple + Whether or not to use chunking and the chunk size. Defaults to None. + output_directory: pathlib.Path : + + num_processes: int : + + output_number: int : + + skip_fields: list : + (Default value = []) + destination_dtype: np.dtype : + (Default value = None) + compression_type: str : + (Default value = None) + compression_options: str : + (Default value = None) + + Returns + ------- + + """ + + # Error checking + assert num_processes > 1, 'num_processes must be greater than 1' + assert output_number >= 0, 'output_number must be greater than or equal to 0' + + # Open the output file for writing + destination_file = concat_internals.destination_safe_open(output_directory / f'{output_number}_particles.h5') + + # Setup the output file + # Note that the call to `__get_num_particles` is potentially expensive as it + # opens every single file to read the number of particles in that file + num_particles = __get_num_particles(build_source_path, num_processes, output_number) + destination_file = __setup_destination_file(build_source_path, + destination_file, + output_number, + num_particles, + skip_fields, + destination_dtype, + compression_type, + compression_options, + chunking) + + # loop over files for a given output + particles_offset = 0 + for i in range(0, num_processes): + # open the input file for reading + source_file = h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') + + # Compute the offset slicing for the 3D data + nx_local, ny_local, nz_local = source_file.attrs['dims_local'] + x_start, y_start, z_start = source_file.attrs['offset'] + x_end, y_end, z_end = x_start+nx_local, y_start+ny_local, z_start+nz_local + + # Get the local number of particles + num_particles_local = source_file.attrs['n_particles_local'][0] + + # write data from individual processor file to correct location in concatenated file + for dataset in list(destination_file.keys()): + + if dataset == 'density': + destination_file[dataset][x_start:x_end, + y_start:y_end, + z_start:z_end] = source_file[dataset] + else: + start = particles_offset + end = particles_offset + num_particles_local + destination_file[dataset][start:end] = source_file[dataset] + + # Update the particles offset + particles_offset += num_particles_local + + # Now that the copy is done we close the source file + source_file.close() + + # Close destination file now that it is fully constructed + destination_file.close() +# ============================================================================== + +# ============================================================================== +def __get_num_particles(build_source_path, + num_processes: int, + output_number: int) -> int: + """Get the total number of particles in the output. This function is heavily + I/O bound and might benefit from utilizing threads. + + Parameters + ---------- + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. + num_processes : int + The number of processes + output_number : int + The output number to get data from + + Returns + ------- + int + The total number of particles in the output + """ + # loop over files for a given output + num_particles = 0 + for i in range(0, num_processes): + # open the input file for reading + with h5py.File(build_source_path(proc_id = i, nfile = output_number), 'r') as source_file: + num_particles += source_file.attrs['n_particles_local'] + + return num_particles +# ============================================================================== + +# ============================================================================== +def __setup_destination_file(build_source_path, + destination_file: h5py.File, + output_number: int, + num_particles: int, + skip_fields: list, + destination_dtype: np.dtype, + compression_type: str, + compression_options: str, + chunking) -> h5py.File: + """Setup the destination file by copying the header and setting up the datasets + + Parameters + ---------- + build_source_path : callable + A function used to construct the paths to the files that are to be concatenated. + destination_file : h5py.File + The destination file + output_number : int + The output number to concatenate + num_particles : int + The total number of particles in the output + skip_fields : list + List of fields to skip concatenating. + destination_dtype : np.dtype + The data type of the output datasets. Accepts most numpy types. + compression_type : str + What kind of compression to use on the output data. + compression_options : str + What compression settings to use if compressing. + chunking : _type_ + Whether or not to use chunking and the chunk size. + + Returns + ------- + h5py.File + The fully set up destination file + """ + with h5py.File(build_source_path(proc_id = 0, nfile = output_number), 'r') as source_file: + # Copy header data + destination_file = concat_internals.copy_header(source_file, destination_file) + + # Make list of datasets to copy + datasets_to_copy = list(source_file.keys()) + datasets_to_copy = [dataset for dataset in datasets_to_copy if not dataset in skip_fields] + + # Create the datasets in the output file + for dataset in datasets_to_copy: + dtype = source_file[dataset].dtype if (destination_dtype == None) else destination_dtype + + # Determine the shape of the dataset + if dataset == 'density': + data_shape = source_file.attrs['dims'] + else: + data_shape = num_particles + + # Create the dataset + destination_file.create_dataset(name=dataset, + shape=data_shape, + dtype=dtype, + chunks=chunking, + compression=compression_type, + compression_opts=compression_options) + + return destination_file +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + + cli = concat_internals.common_cli() + args = cli.parse_args() + + build_source_path = concat_internals.get_source_path_builder( + source_directory = args.source_directory, + pre_extension_suffix = f'_particles', + known_output_snap = args.concat_outputs[0]) + + # Perform the concatenation + for output in args.concat_outputs: + concat_particles_dataset(output_directory=args.output_directory, + num_processes=args.num_processes, + output_number=output, + build_source_path = build_source_path, + skip_fields=args.skip_fields, + destination_dtype=args.dtype, + compression_type=args.compression_type, + compression_options=args.compression_opts, + chunking=args.chunking) + + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') diff --git a/python_scripts/dask_distributed_template.py b/python_scripts/dask_distributed_template.py new file mode 100755 index 000000000..ac40294b2 --- /dev/null +++ b/python_scripts/dask_distributed_template.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +This is the skeleton for how to run a Dask script on Andes at the OLCF. The CLI +commands required are in the docstring at the top, major Dask steps are in +functions, and `main` is mostly empty with a clear area on where to do your +computations. + +Requirements: - Verified working with Dask v2023.6.0 - Install graphviz for +python + - 'conda install -c conda-forge python-graphviz graphviz' + - Make sure your version of msgpack-python is at least v1.0.5; v1.0.3 had a bug + - `conda install -c conda-forge msgpack-python=1.0.5` + +Notes: +- This is entirely focused on getting Dask to run on Andes, Crusher, and + Frontier. Other systems will likely need similar steps but not identical +- Between each python script the Dask scheduler and workers need to be + restarted. +- "--interface ib0" does not seem to be required but likely does improve + transfer speeds. On Crusher it throws an error, just omit it +- It likes to spit out lots of ugly messages on shutdown that look like + something failed. Odds are that it worked fine and just didn't shutdown + gracefully +- On OLCF systems Dask seems to hang on setup if you use more than 256 + processes. I haven't dug too deeply into it but for now it might be better to + limit jobs to that size and run them longer or run multiple jobs, potentially + an array job +- On OLCF systems it doesn't always end the job properly and the job will just + keep running and do nothing. Either set short walltimes so it times out or + just keep an eye on it. Maybe end with the script sending an exit command + +################################################################################ +#!/usr/bin/env bash + +#SBATCH -A +#SBATCH -J +#SBATCH -o /%x-%j.out +#SBATCH -t 04:00:00 +#SBATCH -p batch +#SBATCH -N 32 +#SBATCH --mail-user= #SBATCH --mail-type=ALL + +# Setup some parameters DASK_SCHEDULE_FILE=$(pwd)/dask_schedule_file.json +DASK_NUM_WORKERS=$((SLURM_JOB_NUM_NODES*8)) + +# Add any scripts that you're importing to the PYTHONPATH, even ones in the same +# directory. The worker tasks have their own directories and won't find any of +# your scripts unless they're in the PYTHONPATH +export PYTHONPATH="${PYTHONPATH}:/your/path/here" + +INTERFACE='--interface ib0' # For Andes +# INTERFACE='' # For Crusher + +srun --exclusive --ntasks=1 dask scheduler $INTERFACE --scheduler-file $DASK_SCHEDULE_FILE --no-dashboard --no-show & + +# Wait for the dask-scheduler to start +sleep 30 + +srun --exclusive --ntasks=$DASK_NUM_WORKERS dask worker --scheduler-file $DASK_SCHEDULE_FILE --memory-limit='auto' --worker-class distributed.Worker $INTERFACE --no-dashboard --local-directory & + +# Wait for workers to start +sleep 10 + +python -u ./dask-distributed-template.py --scheduler-file $DASK_SCHEDULE_FILE --num-workers $DASK_NUM_WORKERS + +wait +################################################################################ +""" + +import dask +import dask.array as da +import dask.dataframe as dd +from dask.distributed import Client +from dask import graph_manipulation + +import pathlib +import argparse + +# ============================================================================== +def main(): + # Get command line arguments + cli = argparse.ArgumentParser() + # Required Arguments + cli.add_argument('-N', '--num-workers', type=int, required=True, help='The number of workers to use') + cli.add_argument('-s', '--scheduler-file', type=pathlib.Path, required=True, help='The path to the scheduler file') + # Optional Arguments + # none yet, feel free to add your own + args = cli.parse_args() + + # Setup the Dask cluster + client = startup_dask(args.scheduler_file, args.num_workers) + + # Perform your computation + # ... + # ... + # ... + # Some suggestions: + # - If you're using Delayed then append all tasks to a list and execute them with `dask.compute(*command_list)` + # - Visualize task tree with `dask.visualize(*command_list, filename=str('filename.pdf')) + # - Add dependencies manually with `dask.graph_manipulation.bind(dependent_task, list_of_dependencies)` + # End of Computation + + # Shutdown the Dask cluster + shutdown_dask(client) +# ============================================================================== + +# ============================================================================== +def startup_dask(scheduler_file, num_workers): + # Connect to the dask-cluster + client = Client(scheduler_file=scheduler_file) + print('client information ', client) + + # Block until num_workers are ready + print(f'Waiting for {num_workers} workers...') + client.wait_for_workers(n_workers=num_workers) + + num_connected_workers = len(client.scheduler_info()['workers']) + print(f'{num_connected_workers} workers connected') + + return client +# ============================================================================== + +# ============================================================================== +def shutdown_dask(client): + print('Shutting down the cluster') + workers_list = list(client.scheduler_info()['workers']) + client.retire_workers(workers_list, close_workers=True) + client.shutdown() +# ============================================================================== + +if __name__ == '__main__': + main() diff --git a/python_scripts/dask_single_machine_template.py b/python_scripts/dask_single_machine_template.py new file mode 100755 index 000000000..7816ec791 --- /dev/null +++ b/python_scripts/dask_single_machine_template.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +================================================================================ + Written by Robert Caddy. + + A simple template for Dask scripts running on a single machine +================================================================================ +""" + +import dask +import dask.array as da +import dask.dataframe as dd +from dask import graph_manipulation + +import argparse +import pathlib + +# ============================================================================== +def main(): + cli = argparse.ArgumentParser() + # Required Arguments + # Optional Arguments + cli.add_argument('-n', '--num-workers', type=int, default=8, help='The number of workers to use.') + args = cli.parse_args() + + # Set scheduler type. Options are 'threads', 'processes', 'single-threaded', and 'distributed'. + # - 'threads' uses threads that share memory, often fastest on single machines, can run into issuse with the GIL + # - 'processes' uses multiple processes that do not share memory, can be used to get around issues with the GIL + # - `single-threaded` is great for debugging + dask.config.set(scheduler='processes', num_workers=args.num_workers) + + # Perform your computation + # ... + # ... + # ... + # Some suggestions: + # - If you're using Delayed then append all tasks to a list and execute them with `dask.compute(*command_list)` + # - Visualize task tree with `dask.visualize(*command_list, filename=str('filename.pdf')) + # - Add dependencies manually with `dask.graph_manipulation.bind(dependent_task, list_of_dependencies)` + # End of Computation +# ============================================================================== + +if __name__ == '__main__': + from timeit import default_timer + start = default_timer() + main() + print(f'\nTime to execute: {round(default_timer()-start,2)} seconds') diff --git a/src/analysis/analysis.cpp b/src/analysis/analysis.cpp index af147e776..d9eede2f1 100644 --- a/src/analysis/analysis.cpp +++ b/src/analysis/analysis.cpp @@ -1,71 +1,68 @@ #ifdef ANALYSIS -#include -#include "../analysis/analysis.h" -#include "../io/io.h" + #include "../analysis/analysis.h" + #include -Analysis_Module::Analysis_Module( void ){} + #include "../io/io.h" -#ifdef LYA_STATISTICS -void Grid3D::Compute_Lya_Statistics( ){ +AnalysisModule::AnalysisModule(void) {} + #ifdef LYA_STATISTICS +void Grid3D::Compute_Lya_Statistics() +{ int axis, n_skewers; Real time_start, time_end, time_elapsed; - time_start = get_time(); - + time_start = Get_Time(); + // Copmpute Lya Statitics - chprintf( "Computing Lya Absorbiton along skewers \n"); - for ( axis=0; axis<3; axis++ ){ - - if ( axis == 0 ) n_skewers = Analysis.n_skewers_local_x; - if ( axis == 1 ) n_skewers = Analysis.n_skewers_local_y; - if ( axis == 2 ) n_skewers = Analysis.n_skewers_local_z; - - if ( axis == 0 ) chprintf( " Computing Along X axis: "); - if ( axis == 1 ) chprintf( " Computing Along Y axis: "); - if ( axis == 2 ) chprintf( " Computing Along Z axis: "); - - - Populate_Lya_Skewers_Local( axis ); - Analysis.Initialize_Lya_Statistics_Measurements( axis ); - Analysis.Transfer_Skewers_Data( axis ); - - for ( int skewer_id=0; skewer_id< n_skewers; skewer_id++ ){ - Compute_Transmitted_Flux_Skewer( skewer_id, axis ); - Analysis.Compute_Lya_Mean_Flux_Skewer( skewer_id, axis ); + chprintf("Computing Lya Absorbiton along skewers \n"); + for (axis = 0; axis < 3; axis++) { + if (axis == 0) n_skewers = Analysis.n_skewers_local_x; + if (axis == 1) n_skewers = Analysis.n_skewers_local_y; + if (axis == 2) n_skewers = Analysis.n_skewers_local_z; + + if (axis == 0) chprintf(" Computing Along X axis: "); + if (axis == 1) chprintf(" Computing Along Y axis: "); + if (axis == 2) chprintf(" Computing Along Z axis: "); + + Populate_Lya_Skewers_Local(axis); + Analysis.Initialize_Lya_Statistics_Measurements(axis); + Analysis.Transfer_Skewers_Data(axis); + + for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) { + Compute_Transmitted_Flux_Skewer(skewer_id, axis); + Analysis.Compute_Lya_Mean_Flux_Skewer(skewer_id, axis); } - Analysis.Reduce_Lya_Mean_Flux_Axis( axis ); - + Analysis.Reduce_Lya_Mean_Flux_Axis(axis); + #ifdef OUTPUT_SKEWERS - Analysis.Transfer_Skewers_Global_Axis( axis ); + Analysis.Transfer_Skewers_Global_Axis(axis); #endif - - } + } Analysis.Reduce_Lya_Mean_Flux_Global(); // if( Analysis.Flux_mean_HI > 1e-10 ){ - - // Compute the Flux Power Spectrum after computing the mean transmitted flux - for ( axis=0; axis<3; axis++ ){ - if ( axis == 0 ) n_skewers = Analysis.n_skewers_local_x; - if ( axis == 1 ) n_skewers = Analysis.n_skewers_local_y; - if ( axis == 2 ) n_skewers = Analysis.n_skewers_local_z; + // Compute the Flux Power Spectrum after computing the mean transmitted flux + for (axis = 0; axis < 3; axis++) { + if (axis == 0) n_skewers = Analysis.n_skewers_local_x; + if (axis == 1) n_skewers = Analysis.n_skewers_local_y; + if (axis == 2) n_skewers = Analysis.n_skewers_local_z; - if ( axis == 0 ) chprintf( " Computing P(k) Along X axis\n"); - if ( axis == 1 ) chprintf( " Computing P(k) Along Y axis\n"); - if ( axis == 2 ) chprintf( " Computing P(k) Along Z axis\n"); + if (axis == 0) chprintf(" Computing P(k) Along X axis\n"); + if (axis == 1) chprintf(" Computing P(k) Along Y axis\n"); + if (axis == 2) chprintf(" Computing P(k) Along Z axis\n"); - Initialize_Power_Spectrum_Measurements( axis ); + Initialize_Power_Spectrum_Measurements(axis); - for ( int skewer_id=0; skewer_id< n_skewers; skewer_id++ ){ - Compute_Flux_Power_Spectrum_Skewer( skewer_id, axis ); + for (int skewer_id = 0; skewer_id < n_skewers; skewer_id++) { + Compute_Flux_Power_Spectrum_Skewer(skewer_id, axis); } - - Analysis.Reduce_Power_Spectrum_Axis( axis ); + + Analysis.Reduce_Power_Spectrum_Axis(axis); } - + Analysis.Reduce_Power_Spectrum_Global(); Analysis.Computed_Flux_Power_Spectrum = 1; @@ -73,27 +70,26 @@ void Grid3D::Compute_Lya_Statistics( ){ // Analysis.Computed_Flux_Power_Spectrum = 0; // } - time_end = get_time(); - time_elapsed = (time_end - time_start)*1000; - chprintf( "Analysis Time: %f9.1 ms \n", time_elapsed ); + time_end = Get_Time(); + time_elapsed = (time_end - time_start) * 1000; + chprintf("Analysis Time: %f9.1 ms \n", time_elapsed); } -#endif //LYA_STATISTICS - - -void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ + #endif // LYA_STATISTICS +void Grid3D::Compute_and_Output_Analysis(struct Parameters *P) +{ #ifdef COSMOLOGY - chprintf("\nComputing Analysis current_z: %f\n", Analysis.current_z ); - #else + chprintf("\nComputing Analysis current_z: %f\n", Analysis.current_z); + #else chprintf("\nComputing Analysis \n"); #endif - - cudaMemcpy( C.density, C.device, H.n_fields*H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); + + cudaMemcpy(C.density, C.device, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost); #ifdef PHASE_DIAGRAM - #ifdef CHEMISTRY_GPU - Compute_Gas_Temperature( Chem.Fields.temperature_h, true ); - #endif + #ifdef CHEMISTRY_GPU + Compute_Gas_Temperature(Chem.Fields.temperature_h, true); + #endif Compute_Phase_Diagram(); #endif @@ -101,40 +97,39 @@ void Grid3D::Compute_and_Output_Analysis( struct parameters *P ){ Compute_Lya_Statistics(); #endif - //Write to HDF5 file - #ifdef MPI_CHOLLA - if ( procID == 0 ) Output_Analysis(P); - #else + // Write to HDF5 file + #if defined(COSMOLOGY) || defined(PHASE_DIAGRAM) || defined(LYA_STATISTICS) + #ifdef MPI_CHOLLA + if (procID == 0) Output_Analysis(P); + #else Output_Analysis(P); + #endif #endif - #ifdef LYA_STATISTICS if (Analysis.Computed_Flux_Power_Spectrum == 1) Analysis.Clear_Power_Spectrum_Measurements(); #endif #ifdef COSMOLOGY Analysis.Set_Next_Scale_Output(); - #endif + #endif Analysis.Output_Now = false; - // exit(0); } +void Grid3D::Initialize_AnalysisModule(struct Parameters *P) +{ + chprintf("\nInitializng Analysis Module...\n"); - -void Grid3D::Initialize_Analysis_Module( struct parameters *P ){ - - chprintf( "\nInitializng Analysis Module...\n"); - #ifndef MPI_CHOLLA - chprintf( "The Analysys Module is implemented for the MPI version only... sorry!\n "); + chprintf( + "The Analysys Module is implemented for the MPI version only... " + "sorry!\n "); exit(-1); #endif - - + Real z_now; #ifdef COSMOLOGY z_now = Cosmo.current_z; @@ -142,41 +137,43 @@ void Grid3D::Initialize_Analysis_Module( struct parameters *P ){ z_now = 0; #endif - Analysis.Initialize( H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, H.zblocal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P ); - + Analysis.Initialize(H.xdglobal, H.ydglobal, H.zdglobal, H.xblocal, H.yblocal, H.zblocal, P->nx, P->ny, P->nz, + H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost, z_now, P); } -void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, struct parameters *P ){ - - //Domain Length +void AnalysisModule::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, + int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, + int n_ghost_hydro, Real z_now, struct Parameters *P) +{ + // Domain Length Lbox_x = Lx; Lbox_y = Ly; Lbox_z = Lz; - //Left Boundaries of Local domain + // Left Boundaries of Local domain xMin = x_min; yMin = y_min; zMin = z_min; - //Cell sizes + // Cell sizes dx = dx_real; dy = dy_real; dz = dz_real; - //Size of Global Domain + // Size of Global Domain nx_total = nx; ny_total = ny; nz_total = nz; - //Size of Local Domain + // Size of Local Domain nx_local = nx_real; ny_local = ny_real; nz_local = nz_real; - //Number of ghost cells in the conserved arrays + // Number of ghost cells in the conserved arrays n_ghost = n_ghost_hydro; - //Domain Global left Boundary + // Domain Global left Boundary xMin_global = P->xmin; yMin_global = P->ymin; zMin_global = P->zmin; @@ -184,7 +181,7 @@ void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_ #ifdef COSMOLOGY current_z = z_now; - //Load values of scale factor for analysis outputs + // Load values of scale factor for analysis outputs Load_Scale_Outputs(P); #endif @@ -196,139 +193,128 @@ void Analysis_Module::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_ Initialize_Lya_Statistics(P); #endif - chprintf( "Analysis Module Successfully Initialized.\n\n"); - - + chprintf("Analysis Module Successfully Initialized.\n\n"); } - - - - -void Analysis_Module::Reset(){ - +void AnalysisModule::Reset() +{ #ifdef PHASE_DIAGRAM free(phase_diagram); #endif #ifdef LYA_STATISTICS - free( skewers_HI_density_local_x ); - free( skewers_HI_density_local_y ); - free( skewers_HI_density_local_z ); - free( skewers_HeII_density_local_x ); - free( skewers_HeII_density_local_y ); - free( skewers_HeII_density_local_z ); - free( skewers_velocity_local_x ); - free( skewers_velocity_local_y ); - free( skewers_velocity_local_z ); - free( skewers_temperature_local_x ); - free( skewers_temperature_local_y ); - free( skewers_temperature_local_z ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_local_x ); - free( skewers_density_local_y ); - free( skewers_density_local_z ); - #endif - - #ifdef MPI_CHOLLA - - if ( procID == 0 ){ - free( root_procs_x ); - free( root_procs_y ); - free( root_procs_z ); + free(skewers_HI_density_local_x); + free(skewers_HI_density_local_y); + free(skewers_HI_density_local_z); + free(skewers_HeII_density_local_x); + free(skewers_HeII_density_local_y); + free(skewers_HeII_density_local_z); + free(skewers_velocity_local_x); + free(skewers_velocity_local_y); + free(skewers_velocity_local_z); + free(skewers_temperature_local_x); + free(skewers_temperature_local_y); + free(skewers_temperature_local_z); #ifdef OUTPUT_SKEWERS - free( transfer_buffer_root_x ); - free( transfer_buffer_root_y ); - free( transfer_buffer_root_z ); - free( skewers_transmitted_flux_HI_x_global ); - free( skewers_transmitted_flux_HI_y_global ); - free( skewers_transmitted_flux_HI_z_global ); - free( skewers_transmitted_flux_HeII_x_global ); - free( skewers_transmitted_flux_HeII_y_global ); - free( skewers_transmitted_flux_HeII_z_global ); - free( skewers_density_x_global ); - free( skewers_density_y_global ); - free( skewers_density_z_global ); - free( skewers_HI_density_x_global ); - free( skewers_HI_density_y_global ); - free( skewers_HI_density_z_global ); - free( skewers_HeII_density_x_global ); - free( skewers_HeII_density_y_global ); - free( skewers_HeII_density_z_global ); - free( skewers_temperature_x_global ); - free( skewers_temperature_y_global ); - free( skewers_temperature_z_global ); - free( skewers_los_velocity_x_global ); - free( skewers_los_velocity_y_global ); - free( skewers_los_velocity_z_global ); - - #endif - } - - if ( am_I_root_x ){ - free( skewers_HI_density_root_x ); - free( skewers_HeII_density_root_x ); - free( skewers_velocity_root_x ); - free( skewers_temperature_root_x ); - free( full_HI_density_x ); - free( full_HeII_density_x ); - free( full_velocity_x ); - free( full_temperature_x ); - free( full_optical_depth_HI_x ); - free( full_optical_depth_HeII_x ); - free( full_vel_Hubble_x ); - free( skewers_transmitted_flux_HI_x ); - free( skewers_transmitted_flux_HeII_x ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_root_x ); + free(skewers_density_local_x); + free(skewers_density_local_y); + free(skewers_density_local_z); #endif + + #ifdef MPI_CHOLLA + + if (procID == 0) { + free(root_procs_x); + free(root_procs_y); + free(root_procs_z); + #ifdef OUTPUT_SKEWERS + free(transfer_buffer_root_x); + free(transfer_buffer_root_y); + free(transfer_buffer_root_z); + free(skewers_transmitted_flux_HI_x_global); + free(skewers_transmitted_flux_HI_y_global); + free(skewers_transmitted_flux_HI_z_global); + free(skewers_transmitted_flux_HeII_x_global); + free(skewers_transmitted_flux_HeII_y_global); + free(skewers_transmitted_flux_HeII_z_global); + free(skewers_density_x_global); + free(skewers_density_y_global); + free(skewers_density_z_global); + free(skewers_HI_density_x_global); + free(skewers_HI_density_y_global); + free(skewers_HI_density_z_global); + free(skewers_HeII_density_x_global); + free(skewers_HeII_density_y_global); + free(skewers_HeII_density_z_global); + free(skewers_temperature_x_global); + free(skewers_temperature_y_global); + free(skewers_temperature_z_global); + free(skewers_los_velocity_x_global); + free(skewers_los_velocity_y_global); + free(skewers_los_velocity_z_global); + + #endif } - if ( am_I_root_y ){ - free( skewers_HI_density_root_y ); - free( skewers_HeII_density_root_y ); - free( skewers_velocity_root_y ); - free( skewers_temperature_root_y ); - free( full_HI_density_y ); - free( full_HeII_density_y ); - free( full_velocity_y ); - free( full_temperature_y ); - free( full_optical_depth_HI_y ); - free( full_optical_depth_HeII_y ); - free( full_vel_Hubble_y ); - free( skewers_transmitted_flux_HI_y ); - free( skewers_transmitted_flux_HeII_y ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_root_y ); - #endif + if (am_I_root_x) { + free(skewers_HI_density_root_x); + free(skewers_HeII_density_root_x); + free(skewers_velocity_root_x); + free(skewers_temperature_root_x); + free(full_HI_density_x); + free(full_HeII_density_x); + free(full_velocity_x); + free(full_temperature_x); + free(full_optical_depth_HI_x); + free(full_optical_depth_HeII_x); + free(full_vel_Hubble_x); + free(skewers_transmitted_flux_HI_x); + free(skewers_transmitted_flux_HeII_x); + #ifdef OUTPUT_SKEWERS + free(skewers_density_root_x); + #endif } - if ( am_I_root_z ){ - free( skewers_HI_density_root_z ); - free( skewers_HeII_density_root_z ); - free( skewers_velocity_root_z ); - free( skewers_temperature_root_z ); - free( full_HI_density_z ); - free( full_HeII_density_z ); - free( full_velocity_z ); - free( full_temperature_z ); - free( full_optical_depth_HI_z ); - free( full_optical_depth_HeII_z ); - free( full_vel_Hubble_z ); - free( skewers_transmitted_flux_HI_z ); - free( skewers_transmitted_flux_HeII_z ); - #ifdef OUTPUT_SKEWERS - free( skewers_density_root_z ); - #endif + if (am_I_root_y) { + free(skewers_HI_density_root_y); + free(skewers_HeII_density_root_y); + free(skewers_velocity_root_y); + free(skewers_temperature_root_y); + free(full_HI_density_y); + free(full_HeII_density_y); + free(full_velocity_y); + free(full_temperature_y); + free(full_optical_depth_HI_y); + free(full_optical_depth_HeII_y); + free(full_vel_Hubble_y); + free(skewers_transmitted_flux_HI_y); + free(skewers_transmitted_flux_HeII_y); + #ifdef OUTPUT_SKEWERS + free(skewers_density_root_y); + #endif } + if (am_I_root_z) { + free(skewers_HI_density_root_z); + free(skewers_HeII_density_root_z); + free(skewers_velocity_root_z); + free(skewers_temperature_root_z); + free(full_HI_density_z); + free(full_HeII_density_z); + free(full_velocity_z); + free(full_temperature_z); + free(full_optical_depth_HI_z); + free(full_optical_depth_HeII_z); + free(full_vel_Hubble_z); + free(skewers_transmitted_flux_HI_z); + free(skewers_transmitted_flux_HeII_z); + #ifdef OUTPUT_SKEWERS + free(skewers_density_root_z); + #endif + } + #endif #endif - #endif - - } - - #endif diff --git a/src/analysis/analysis.h b/src/analysis/analysis.h index 096d6b6bd..59ccb050a 100644 --- a/src/analysis/analysis.h +++ b/src/analysis/analysis.h @@ -1,20 +1,19 @@ #ifdef ANALYSIS -#ifndef ANALYSIS_H -#define ANALYSIS_H + #ifndef ANALYSIS_H + #define ANALYSIS_H -#include "../global/global.h" -#include + #include -#ifdef LYA_STATISTICS -#include -#endif - -using namespace std; + #include "../global/global.h" -class Analysis_Module{ -public: + #ifdef LYA_STATISTICS + #include + #endif +class AnalysisModule +{ + public: Real Lbox_x; Real Lbox_y; Real Lbox_z; @@ -47,13 +46,11 @@ class Analysis_Module{ bool Output_Now; int n_file; - #ifdef COSMOLOGY + #ifdef COSMOLOGY Real current_z; - #endif + #endif - - - #ifdef PHASE_DIAGRAM + #ifdef PHASE_DIAGRAM int n_dens; int n_temp; Real temp_min; @@ -61,13 +58,12 @@ class Analysis_Module{ Real dens_min; Real dens_max; float *phase_diagram; - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA float *phase_diagram_global; - #endif - #endif - + #endif + #endif - #ifdef LYA_STATISTICS + #ifdef LYA_STATISTICS int Computed_Flux_Power_Spectrum; int n_stride; int n_skewers_local_x; @@ -128,7 +124,6 @@ class Analysis_Module{ Real *full_HI_density_y; Real *full_HI_density_z; - Real *full_HeII_density_x; Real *full_HeII_density_y; Real *full_HeII_density_z; @@ -140,91 +135,89 @@ class Analysis_Module{ Real *full_temperature_x; Real *full_temperature_y; Real *full_temperature_z; - + Real *full_optical_depth_HI_x; Real *full_optical_depth_HI_y; Real *full_optical_depth_HI_z; - + Real *full_optical_depth_HeII_x; Real *full_optical_depth_HeII_y; Real *full_optical_depth_HeII_z; - + Real *full_vel_Hubble_x; Real *full_vel_Hubble_y; Real *full_vel_Hubble_z; - + Real *skewers_transmitted_flux_HI_x; Real *skewers_transmitted_flux_HI_y; Real *skewers_transmitted_flux_HI_z; - + Real *skewers_transmitted_flux_HeII_x; Real *skewers_transmitted_flux_HeII_y; Real *skewers_transmitted_flux_HeII_z; - - #ifdef OUTPUT_SKEWERS - + + #ifdef OUTPUT_SKEWERS + Real *skewers_density_local_x; Real *skewers_density_local_y; Real *skewers_density_local_z; - + Real *skewers_density_root_x; Real *skewers_density_root_y; Real *skewers_density_root_z; - + Real *skewers_density_x_global; Real *skewers_density_y_global; Real *skewers_density_z_global; - - + Real *skewers_HI_density_x_global; Real *skewers_HI_density_y_global; Real *skewers_HI_density_z_global; - + Real *skewers_HeII_density_x_global; Real *skewers_HeII_density_y_global; Real *skewers_HeII_density_z_global; - + Real *skewers_temperature_x_global; Real *skewers_temperature_y_global; Real *skewers_temperature_z_global; - + Real *skewers_los_velocity_x_global; Real *skewers_los_velocity_y_global; Real *skewers_los_velocity_z_global; - + Real *skewers_transmitted_flux_HI_x_global; Real *skewers_transmitted_flux_HI_y_global; Real *skewers_transmitted_flux_HI_z_global; - + Real *skewers_transmitted_flux_HeII_x_global; Real *skewers_transmitted_flux_HeII_y_global; Real *skewers_transmitted_flux_HeII_z_global; - + Real *transfer_buffer_root_x; Real *transfer_buffer_root_y; Real *transfer_buffer_root_z; - #endif - + #endif + Real Flux_mean_root_HI_x; Real Flux_mean_root_HI_y; Real Flux_mean_root_HI_z; - + Real Flux_mean_root_HeII_x; Real Flux_mean_root_HeII_y; Real Flux_mean_root_HeII_z; - + Real Flux_mean_HI_x; Real Flux_mean_HI_y; Real Flux_mean_HI_z; - + Real Flux_mean_HeII_x; Real Flux_mean_HeII_y; Real Flux_mean_HeII_z; - + Real Flux_mean_HI; Real Flux_mean_HeII; - int n_skewers_processed; int n_ghost_skewer; @@ -281,51 +274,48 @@ class Analysis_Module{ Real *ps_global_z; Real *ps_mean; Real *k_centers; - + bool *root_procs_x; bool *root_procs_y; - bool *root_procs_z; - - #ifdef MPI_CHOLLA + bool *root_procs_z; + + #ifdef MPI_CHOLLA Real *mpi_domain_boundary_x; Real *mpi_domain_boundary_y; Real *mpi_domain_boundary_z; vector mpi_indices_x; vector mpi_indices_y; vector mpi_indices_z; - #endif + #endif - #endif + #endif - - Analysis_Module( void ); - void Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, struct parameters *P ); + AnalysisModule(void); + void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, + int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_hydro, Real z_now, + struct Parameters *P); void Reset(void); - void Load_Scale_Outputs( struct parameters *P ); - void Set_Next_Scale_Output( ); - - - - #ifdef PHASE_DIAGRAM - void Initialize_Phase_Diagram( struct parameters *P ); - #endif - - #ifdef LYA_STATISTICS - void Initialize_Lya_Statistics( struct parameters *P ); - void Initialize_Lya_Statistics_Measurements( int axis ); - void Transfer_Skewers_Data( int axis ); - void Compute_Lya_Mean_Flux_Skewer( int skewer_id, int axis ); - void Reduce_Lya_Mean_Flux_Axis( int axis ); - void Reduce_Lya_Mean_Flux_Global( ); - void Clear_Power_Spectrum_Measurements( void ); - void Reduce_Power_Spectrum_Axis( int axis ); - void Reduce_Power_Spectrum_Global( ); - void Transfer_Skewers_Global_Axis( int axis ); - #endif + void Load_Scale_Outputs(struct Parameters *P); + void Set_Next_Scale_Output(); + + #ifdef PHASE_DIAGRAM + void Initialize_Phase_Diagram(struct Parameters *P); + #endif + + #ifdef LYA_STATISTICS + void Initialize_Lya_Statistics(struct Parameters *P); + void Initialize_Lya_Statistics_Measurements(int axis); + void Transfer_Skewers_Data(int axis); + void Compute_Lya_Mean_Flux_Skewer(int skewer_id, int axis); + void Reduce_Lya_Mean_Flux_Axis(int axis); + void Reduce_Lya_Mean_Flux_Global(); + void Clear_Power_Spectrum_Measurements(void); + void Reduce_Power_Spectrum_Axis(int axis); + void Reduce_Power_Spectrum_Global(); + void Transfer_Skewers_Global_Axis(int axis); + #endif }; - - -#endif + #endif #endif diff --git a/src/analysis/feedback_analysis.cpp b/src/analysis/feedback_analysis.cpp new file mode 100644 index 000000000..3dab7b6da --- /dev/null +++ b/src/analysis/feedback_analysis.cpp @@ -0,0 +1,143 @@ +#include "feedback_analysis.h" + +#include "../io/io.h" +#include "../model/disk_galaxy.h" + +#ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" +#endif + +#define VRMS_CUTOFF_DENSITY (0.01 * 0.6 * MP / DENSITY_UNIT) + +FeedbackAnalysis::FeedbackAnalysis(Grid3D& G) +{ + // allocate arrays + h_circ_vel_x = (Real*)malloc(G.H.n_cells * sizeof(Real)); + h_circ_vel_y = (Real*)malloc(G.H.n_cells * sizeof(Real)); + +#ifdef PARTICLES_GPU + GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_x, G.H.n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void**)&d_circ_vel_y, G.H.n_cells * sizeof(Real))); +#endif + + // setup the (constant) circular speed arrays + int id; + Real vca, r, x, y, z; + + for (int k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { + for (int j = G.H.n_ghost; j < G.H.ny - G.H.n_ghost; j++) { + for (int i = G.H.n_ghost; i < G.H.nx - G.H.n_ghost; i++) { + id = i + j * G.H.nx + k * G.H.nx * G.H.ny; + + G.Get_Position(i, j, k, &x, &y, &z); + r = sqrt(x * x + y * y); + + vca = sqrt(r * fabs(galaxies::MW.gr_total_D3D(r, z))); + h_circ_vel_x[id] = -y / r * vca; + h_circ_vel_y[id] = x / r * vca; + } + } + } + +#ifdef PARTICLES_GPU + GPU_Error_Check(cudaMemcpy(d_circ_vel_x, h_circ_vel_x, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(d_circ_vel_y, h_circ_vel_y, G.H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); +#endif +} + +FeedbackAnalysis::~FeedbackAnalysis() +{ + free(h_circ_vel_x); + free(h_circ_vel_y); +#ifdef PARTICLES_GPU + GPU_Error_Check(cudaFree(d_circ_vel_x)); + GPU_Error_Check(cudaFree(d_circ_vel_y)); +#endif +} + +void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion(Grid3D& G) +{ +#ifdef CPU_TIME + G.Timer.FeedbackAnalysis.Start(); +#endif + +#ifdef PARTICLES_CPU + int i, j, k, id, idm, idp; + int id_grav; + Real x, y, z, r, xpm, xpp, ypm, ypp, zpm, zpp; + Real Pm, Pp; + Real dPdx, dPdy, dPdr; + Real vx, vy, vz, vrms_poisson, vrms_analytic, vcp, vca, vcxp, vcyp, vcxa, vcya; + Real total_mass, partial_mass = 0, total_var_analytic = 0, total_var_poisson = 0, partial_var_poisson = 0, + partial_var_analytic = 0; + + int n_ghost_grav = G.Particles.G.n_ghost_particles_grid; + int ghost_diff = n_ghost_grav - G.H.n_ghost; + int nx_grav = G.Particles.G.nx_local + 2 * n_ghost_grav; + int ny_grav = G.Particles.G.ny_local + 2 * n_ghost_grav; + + for (k = 0; k < G.H.nz_real; k++) { + for (j = 0; j < G.H.ny_real; j++) { + for (i = 0; i < G.H.nx_real; i++) { + id = (i + G.H.n_ghost) + (j + G.H.n_ghost) * G.H.nx + (k + G.H.n_ghost) * G.H.nx * G.H.ny; + partial_mass += G.C.density[id]; + } + } + } + #ifdef MPI_CHOLLA + MPI_Allreduce(&partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); + #else + total_mass = partial_mass; + #endif + + for (k = G.H.n_ghost; k < G.H.nz - G.H.n_ghost; k++) { + for (j = G.H.n_ghost; j < G.H.ny - G.H.n_ghost; j++) { + for (i = G.H.n_ghost; i < G.H.nx - G.H.n_ghost; i++) { + id = i + j * G.H.nx + k * G.H.nx * G.H.ny; + id_grav = (i + ghost_diff) + (j + ghost_diff) * nx_grav + (k + ghost_diff) * nx_grav * ny_grav; + + if (G.C.density[id] < VRMS_CUTOFF_DENSITY) continue; // in cgs, this is 0.01 cm^{-3} + + G.Get_Position(i, j, k, &x, &y, &z); + r = sqrt(x * x + y * y); + + vcp = sqrt(r * fabs(G.Particles.G.gravity_x[id_grav] * x / r + G.Particles.G.gravity_y[id_grav] * y / r)); + vcxp = -y / r * vcp; + vcyp = x / r * vcp; + vx = G.C.momentum_x[id] / G.C.density[id]; + vy = G.C.momentum_y[id] / G.C.density[id]; + vz = G.C.momentum_z[id] / G.C.density[id]; + + partial_var_poisson += ((vx - vcxp) * (vx - vcxp) + (vy - vcyp) * (vy - vcyp) + vz * vz) * G.C.density[id]; + partial_var_analytic += ((vx - h_circ_vel_x[id]) * (vx - h_circ_vel_x[id]) + + (vy - h_circ_vel_y[id]) * (vy - h_circ_vel_y[id]) + (vz * vz)) * + G.C.density[id]; + } + } + } + partial_var_poisson /= total_mass; + partial_var_analytic /= total_mass; + + #ifdef MPI_CHOLLA + MPI_Reduce(&partial_var_poisson, &total_var_poisson, 1, MPI_CHREAL, MPI_SUM, root, world); + MPI_Reduce(&partial_var_analytic, &total_var_analytic, 1, MPI_CHREAL, MPI_SUM, root, world); + + #else + total_var_poisson = partial_var_poisson; + total_var_analytic = partial_var_analytic; + #endif + + vrms_poisson = sqrt(total_var_poisson) * VELOCITY_UNIT / 1e5; // output in km/s + vrms_analytic = sqrt(total_var_analytic) * VELOCITY_UNIT / 1e5; + + chprintf("feedback: time %f, dt=%f, vrms_p = %f km/s, vrms_a = %f km/s\n", G.H.t, G.H.dt, vrms_poisson, + vrms_analytic); + +#elif defined(PARTICLES_GPU) + Compute_Gas_Velocity_Dispersion_GPU(G); +#endif // PARTICLES_CPU + +#ifdef CPU_TIME + G.Timer.FeedbackAnalysis.End(); +#endif +} diff --git a/src/analysis/feedback_analysis.h b/src/analysis/feedback_analysis.h new file mode 100644 index 000000000..9b29420f4 --- /dev/null +++ b/src/analysis/feedback_analysis.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include "../global/global.h" +#include "../grid/grid3D.h" + +class FeedbackAnalysis +{ + Real *h_circ_vel_x, *h_circ_vel_y; + +#ifdef PARTICLES_GPU + Real *d_circ_vel_x, *d_circ_vel_y; + void Compute_Gas_Velocity_Dispersion_GPU(Grid3D& G); +#endif + + public: + int countSN{0}; + int countResolved{0}; + int countUnresolved{0}; + Real totalEnergy{0}; + Real totalMomentum{0}; + Real totalUnresEnergy{0}; + + FeedbackAnalysis(Grid3D& G); + ~FeedbackAnalysis(); + + void Compute_Gas_Velocity_Dispersion(Grid3D& G); + void Reset(); +}; \ No newline at end of file diff --git a/src/analysis/feedback_analysis_gpu.cu b/src/analysis/feedback_analysis_gpu.cu new file mode 100644 index 000000000..11132bece --- /dev/null +++ b/src/analysis/feedback_analysis_gpu.cu @@ -0,0 +1,205 @@ + + +#include + +#include "../io/io.h" +#include "feedback_analysis.h" +#ifdef PARTICLES_GPU + + #define MU 0.6 + // in cgs, this is 0.01 cm^{-3} + #define MIN_DENSITY (0.01 * MP * MU * LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT / MASS_UNIT) // 148279.7 + #define TPB_ANALYSIS 1024 + +__device__ void Warp_Reduce(volatile Real *buff, size_t tid) +{ + if (TPB_ANALYSIS >= 64) { + buff[tid] += buff[tid + 32]; + } + if (TPB_ANALYSIS >= 32) { + buff[tid] += buff[tid + 16]; + } + if (TPB_ANALYSIS >= 16) { + buff[tid] += buff[tid + 8]; + } + if (TPB_ANALYSIS >= 8) { + buff[tid] += buff[tid + 4]; + } + if (TPB_ANALYSIS >= 4) { + buff[tid] += buff[tid + 2]; + } + if (TPB_ANALYSIS >= 2) { + buff[tid] += buff[tid + 1]; + } +} + +void __global__ Reduce_Tubulence_kernel(int nx, int ny, int nz, int n_ghost, Real *density, Real *momentum_x, + Real *momentum_y, Real *momentum_z, Real *circ_vel_x, Real *circ_vel_y, + Real *partial_mass, Real *partial_vel) +{ + __shared__ Real s_mass[TPB_ANALYSIS]; + __shared__ Real s_vel[TPB_ANALYSIS]; + int id, zid, yid, xid, tid; + + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + tid = threadIdx.x; + + s_mass[tid] = 0; + s_vel[tid] = 0; + Real vx, vy, vz; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost && density[id] > MIN_DENSITY) { + s_mass[tid] = density[id]; + vx = momentum_x[id] / density[id]; + vy = momentum_y[id] / density[id]; + vz = momentum_z[id] / density[id]; + s_vel[tid] = + ((vx - circ_vel_x[id]) * (vx - circ_vel_x[id]) + (vy - circ_vel_y[id]) * (vy - circ_vel_y[id]) + (vz * vz)) * + density[id]; + } + __syncthreads(); + + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + s_mass[tid] += s_mass[tid + s]; + s_vel[tid] += s_vel[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + // printf("ReduceKernel 1: blockIdx.x = %d -> s_mass[0] = %.5e, s_vel[0] = + // %.5e\n", blockIdx.x, s_mass[0], s_vel[0]); + partial_mass[blockIdx.x] = s_mass[0]; + partial_vel[blockIdx.x] = s_vel[0]; + } +} + +void __global__ Reduce_Tubulence_kernel_2(Real *input_m, Real *input_v, Real *output_m, Real *output_v, int n) +{ + __shared__ Real s_mass[TPB_ANALYSIS]; + __shared__ Real s_vel[TPB_ANALYSIS]; + + size_t tid = threadIdx.x; + size_t i = blockIdx.x * (TPB_ANALYSIS) + tid; + size_t gridSize = TPB_ANALYSIS * gridDim.x; + s_mass[tid] = 0; + s_vel[tid] = 0; + + while (i < n) { + s_mass[tid] += input_m[i]; + s_vel[tid] += input_v[i]; + i += gridSize; + } + __syncthreads(); + + if (TPB_ANALYSIS >= 1024) { + if (tid < 512) { + s_mass[tid] += s_mass[tid + 512]; + s_vel[tid] += s_vel[tid + 512]; + } + __syncthreads(); + } + if (TPB_ANALYSIS >= 512) { + if (tid < 256) { + s_mass[tid] += s_mass[tid + 256]; + s_vel[tid] += s_vel[tid + 256]; + } + __syncthreads(); + } + if (TPB_ANALYSIS >= 256) { + if (tid < 128) { + s_mass[tid] += s_mass[tid + 128]; + s_vel[tid] += s_vel[tid + 128]; + } + __syncthreads(); + } + if (TPB_ANALYSIS >= 128) { + if (tid < 64) { + s_mass[tid] += s_mass[tid + 64]; + s_vel[tid] += s_vel[tid + 64]; + } + __syncthreads(); + } + + if (tid < 32) { + Warp_Reduce(s_mass, tid); + Warp_Reduce(s_vel, tid); + } + __syncthreads(); + + if (tid == 0) { + // printf("Reduce_Tubulence_kernel 2: n = %d/%d, blockIdx.x = %d -> + // s_mass[0] = %.5e, s_vel[0] = %.5e\n", + // n, gridDim.x, blockIdx.x, s_mass[0], s_vel[0]); + output_m[blockIdx.x] = s_mass[0]; + output_v[blockIdx.x] = s_vel[0]; + } +} + +void FeedbackAnalysis::Compute_Gas_Velocity_Dispersion_GPU(Grid3D &G) +{ + size_t ngrid = std::ceil((1. * G.H.nx * G.H.ny * G.H.nz) / TPB_ANALYSIS); + + Real *d_partial_mass; + Real *d_partial_vel; + Real *h_partial_mass = (Real *)malloc(ngrid * sizeof(Real)); + Real *h_partial_vel = (Real *)malloc(ngrid * sizeof(Real)); + GPU_Error_Check(cudaMalloc((void **)&d_partial_mass, ngrid * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&d_partial_vel, ngrid * sizeof(Real))); + + Real total_mass = 0; + Real total_vel = 0; + + hipLaunchKernelGGL(Reduce_Tubulence_kernel, ngrid, TPB_ANALYSIS, 0, 0, G.H.nx, G.H.ny, G.H.nz, G.H.n_ghost, + G.C.d_density, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, d_circ_vel_x, d_circ_vel_y, + d_partial_mass, d_partial_vel); + + size_t n = ngrid; + Real *mass_input = d_partial_mass; + Real *vel_input = d_partial_vel; + while (n > TPB_ANALYSIS) { + ngrid = std::ceil((n * 1.) / TPB_ANALYSIS); + // printf("Reduce_Tubulence: Next kernel call grid size is %d\n", ngrid); + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, ngrid, TPB_ANALYSIS, 0, 0, mass_input, vel_input, d_partial_mass, + d_partial_vel, n); + mass_input = d_partial_mass; + vel_input = d_partial_vel; + n = ngrid; + } + + if (n > 1) { + hipLaunchKernelGGL(Reduce_Tubulence_kernel_2, 1, TPB_ANALYSIS, 0, 0, d_partial_mass, d_partial_vel, d_partial_mass, + d_partial_vel, n); + } + + // cudaDeviceSynchronize(); + + GPU_Error_Check(cudaMemcpy(h_partial_mass, d_partial_mass, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(h_partial_vel, d_partial_vel, ngrid * sizeof(Real), cudaMemcpyDeviceToHost)); + + #ifdef MPI_CHOLLA + MPI_Allreduce(h_partial_mass, &total_mass, 1, MPI_CHREAL, MPI_SUM, world); + MPI_Allreduce(h_partial_vel, &total_vel, 1, MPI_CHREAL, MPI_SUM, world); + #else + total_mass = h_partial_mass[0]; + total_vel = h_partial_vel[0]; + #endif + + if (total_vel < 0 || total_mass < 0) { + chprintf("feedback trouble. total_vel = %.3e, total_mass = %.3e\n", total_vel, total_mass); + } + + chprintf("feedback: time %f, dt=%f, vrms = %f km/s\n", G.H.t, G.H.dt, + sqrt(total_vel / total_mass) * VELOCITY_UNIT / 1e5); + + GPU_Error_Check(cudaFree(d_partial_vel)); + GPU_Error_Check(cudaFree(d_partial_mass)); + + free(h_partial_mass); + free(h_partial_vel); +} + +#endif // PARTICLES_GPU diff --git a/src/analysis/io_analysis.cpp b/src/analysis/io_analysis.cpp index 3f0141c05..962503dea 100644 --- a/src/analysis/io_analysis.cpp +++ b/src/analysis/io_analysis.cpp @@ -1,18 +1,17 @@ #ifdef ANALYSIS -#include -#include -#include "../analysis/analysis.h" -#include "../io/io.h" -#include "../grid/grid3D.h" + #include + #include -using namespace std; + #include "../analysis/analysis.h" + #include "../grid/grid3D.h" + #include "../io/io.h" // #define OUTPUT_SKEWERS_TRANSMITTED_FLUX -#ifdef OUTPUT_SKEWERS -void Grid3D::Output_Skewers_File( struct parameters *P ){ - + #ifdef OUTPUT_SKEWERS +void Grid3D::Output_Skewers_File(struct Parameters *P) +{ FILE *out; char filename[180]; char timestep[20]; @@ -20,439 +19,451 @@ void Grid3D::Output_Skewers_File( struct parameters *P ){ // create the filename strcpy(filename, P->skewersdir); sprintf(timestep, "%d", Analysis.n_file); - strcat(filename,timestep); + strcat(filename, timestep); // a binary file is created for each process // only one HDF5 file is created - strcat(filename,"_skewers"); - strcat(filename,".h5"); - - + strcat(filename, "_skewers"); + strcat(filename, ".h5"); + chprintf("Writing Skewers File: %d ", Analysis.n_file); - - hid_t file_id; - herr_t status; - + + hid_t file_id; + herr_t status; + // Create a new file collectively file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - Write_Skewers_Header_HDF5( file_id ); - Write_Skewers_Data_HDF5( file_id ); - + Write_Skewers_Header_HDF5(file_id); + Write_Skewers_Data_HDF5(file_id); + // Close the file status = H5Fclose(file_id); - - chprintf("Saved Skewers File.\n"); - -} + chprintf("Saved Skewers File.\n"); +} +void Grid3D::Write_Skewers_Header_HDF5(hid_t file_id) +{ + hid_t attribute_id, dataspace_id; + herr_t status; + hsize_t attr_dims; + int int_data[3]; + Real Real_data[3]; -void Grid3D::Write_Skewers_Header_HDF5( hid_t file_id ){ - hid_t attribute_id, dataspace_id; - herr_t status; - hsize_t attr_dims; - int int_data[3]; - Real Real_data[3]; - - Real H0 = Cosmo.cosmo_h*100; + Real H0 = Cosmo.cosmo_h * 100; // Single attributes first attr_dims = 1; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - #ifdef COSMOLOGY + #ifdef COSMOLOGY attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); - status = H5Aclose(attribute_id); - #endif - + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); + status = H5Aclose(attribute_id); + #endif + status = H5Sclose(dataspace_id); - + // 3D atributes now attr_dims = 3; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); - + Real_data[0] = Analysis.Lbox_x; Real_data[1] = Analysis.Lbox_y; Real_data[2] = Analysis.Lbox_z; - - attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); - - status = H5Sclose(dataspace_id); - -} - - + attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); + status = H5Aclose(attribute_id); -void Grid3D::Write_Skewers_Data_HDF5( hid_t file_id ){ + status = H5Sclose(dataspace_id); +} +void Grid3D::Write_Skewers_Data_HDF5(hid_t file_id) +{ int n_global_x, n_global_y, n_global_z; int n_los_x, n_los_y, n_los_z; n_global_x = Analysis.n_skewers_processed_x; n_global_y = Analysis.n_skewers_processed_y; n_global_z = Analysis.n_skewers_processed_z; - n_los_x = Analysis.nx_total; - n_los_y = Analysis.ny_total; - n_los_z = Analysis.nz_total; - + n_los_x = Analysis.nx_total; + n_los_y = Analysis.ny_total; + n_los_z = Analysis.nz_total; + Real *dataset_buffer_x; Real *dataset_buffer_y; Real *dataset_buffer_z; - + int data_id, buffer_id; - - herr_t status; - hid_t dataset_id; - - //Write Skerwes X - dataset_buffer_x = (Real *) malloc(n_global_x*n_los_x*sizeof(Real)); - hsize_t dims_x[2]; + + herr_t status; + hid_t dataset_id; + + // Write Skerwes X + dataset_buffer_x = (Real *)malloc(n_global_x * n_los_x * sizeof(Real)); + hsize_t dims_x[2]; dims_x[0] = n_global_x; dims_x[1] = n_los_x; hid_t skewers_group_x, dataspace_id_skewers_x; dataspace_id_skewers_x = H5Screate_simple(2, dims_x, NULL); skewers_group_x = H5Gcreate(file_id, "skewers_x", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - for ( int skewer_id=0; skewer_idanalysisdir); sprintf(timestep, "%d", Analysis.n_file); - strcat(filename,timestep); + strcat(filename, timestep); // a binary file is created for each process // only one HDF5 file is created - strcat(filename,"_analysis"); - strcat(filename,".h5"); - - + strcat(filename, "_analysis"); + strcat(filename, ".h5"); + chprintf("Writing Analysis File: %d ", Analysis.n_file); - - hid_t file_id; - herr_t status; + + hid_t file_id; + herr_t status; // Create a new file collectively file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - Write_Analysis_Header_HDF5( file_id ); - Write_Analysis_Data_HDF5( file_id ); + Write_Analysis_Header_HDF5(file_id); + Write_Analysis_Data_HDF5(file_id); // Close the file status = H5Fclose(file_id); chprintf("Saved Analysis File.\n\n"); - } - -void Grid3D::Write_Analysis_Header_HDF5( hid_t file_id ){ - hid_t attribute_id, dataspace_id; - herr_t status; - hsize_t attr_dims; - int int_data[3]; - Real Real_data[3]; - +void Grid3D::Write_Analysis_Header_HDF5(hid_t file_id) +{ + hid_t attribute_id, dataspace_id; + herr_t status; + hsize_t attr_dims; + int int_data[3]; + Real Real_data[3]; // Single attributes first attr_dims = 1; // Create the data space for the attribute dataspace_id = H5Screate_simple(1, &attr_dims, NULL); #ifdef COSMOLOGY - Real H0 = Cosmo.cosmo_h*100; + Real H0 = Cosmo.cosmo_h * 100; attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_a); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.current_z); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "H0", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &H0); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "Omega_M", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_M); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "Omega_L", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_L); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "Omega_b", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.Omega_b); + status = H5Aclose(attribute_id); #endif - + status = H5Sclose(dataspace_id); // 3D atributes now @@ -531,157 +539,140 @@ void Grid3D::Write_Analysis_Header_HDF5( hid_t file_id ){ Real_data[2] = Analysis.Lbox_z; attribute_id = H5Acreate(file_id, "Lbox", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); - status = H5Aclose(attribute_id); - + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, Real_data); + status = H5Aclose(attribute_id); + status = H5Sclose(dataspace_id); - } - - -void Grid3D::Write_Analysis_Data_HDF5( hid_t file_id ){ - - - herr_t status; - hid_t dataset_id, dataspace_id, group_id, attribute_id; - hsize_t dims2d[2]; - hsize_t attr_dims; +void Grid3D::Write_Analysis_Data_HDF5(hid_t file_id) +{ + herr_t status; + hid_t dataset_id, dataspace_id, group_id, attribute_id; + hsize_t dims2d[2]; + hsize_t attr_dims; int nx_dset, ny_dset, j, i, id, buf_id; #ifdef PHASE_DIAGRAM - nx_dset = Analysis.n_temp; - ny_dset = Analysis.n_dens; - float *dataset_buffer = (float *) malloc(nx_dset*ny_dset*sizeof(Real)); - + nx_dset = Analysis.n_temp; + ny_dset = Analysis.n_dens; + float *dataset_buffer = (float *)malloc(nx_dset * ny_dset * sizeof(Real)); // Create the data space for the datasets - dims2d[0] = nx_dset; - dims2d[1] = ny_dset; + dims2d[0] = nx_dset; + dims2d[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims2d, NULL); group_id = H5Gcreate(file_id, "/phase_diagram", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - for (j=0; janalysis_scale_outputs_file); - chprintf( " Loading Analysis Scale_Factor Outpus: %s\n", filename_1); + chprintf(" Loading Analysis Scale_Factor Outpus: %s\n", filename_1); - ifstream file_out ( filename_1 ); + ifstream file_out(filename_1); string line; Real a_value, current_a; - if (file_out.is_open()){ - while ( getline (file_out,line) ){ - a_value = atof( line.c_str() ); - scale_outputs.push_back( a_value ); + if (file_out.is_open()) { + while (getline(file_out, line)) { + a_value = atof(line.c_str()); + scale_outputs.push_back(a_value); n_outputs += 1; // chprintf("%f\n", a_value); } file_out.close(); - n_outputs = scale_outputs.size(); + n_outputs = scale_outputs.size(); next_output_indx = 0; chprintf(" Loaded %d scale outputs \n", n_outputs); - } - else{ + } else { chprintf(" Error: Unable to open cosmology outputs file\n"); exit(1); } @@ -689,42 +680,45 @@ void Analysis_Module::Load_Scale_Outputs( struct parameters *P ) { chprintf(" Setting next analysis output\n"); int scale_indx = next_output_indx; - current_a = 1. / ( 1 + current_z ); - a_value = scale_outputs[scale_indx]; + current_a = 1. / (1 + current_z); + a_value = scale_outputs[scale_indx]; - while ( (current_a - a_value) > 1e-4 ){ + while ((current_a - a_value) > 1e-4) { // chprintf( "%f %f\n", a_value, current_a); scale_indx += 1; a_value = scale_outputs[scale_indx]; } next_output_indx = scale_indx; - next_output = a_value; - chprintf(" Next output scale index: %d \n", next_output_indx ); + next_output = a_value; + chprintf(" Next output scale index: %d \n", next_output_indx); chprintf(" Next output scale value: %f \n", next_output); - if ( fabs(current_a - next_output) > 1e-4 ) Output_Now = false; - else Output_Now = true; + if (fabs(current_a - next_output) > 1e-4) + Output_Now = false; + else + Output_Now = true; n_file = next_output_indx; - } -void Analysis_Module::Set_Next_Scale_Output( ){ - +void AnalysisModule::Set_Next_Scale_Output() +{ int scale_indx = next_output_indx; Real a_value, current_a; - current_a = 1. / ( 1 + current_z ); - a_value = scale_outputs[scale_indx]; - if ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx = 1; - else scale_indx += 1; + current_a = 1. / (1 + current_z); + a_value = scale_outputs[scale_indx]; + if ((scale_indx == 0) && (abs(a_value - current_a) < 1e-5)) + scale_indx = 1; + else + scale_indx += 1; a_value = scale_outputs[scale_indx]; next_output_indx = scale_indx; - next_output = a_value; - n_file = next_output_indx; + next_output = a_value; + n_file = next_output_indx; // chprintf("Next Analysis Output: z=%f \n", 1./next_output - 1); } -#endif //COSMOLOGY + #endif // COSMOLOGY #endif diff --git a/src/analysis/lya_statistics.cpp b/src/analysis/lya_statistics.cpp index 35f8ce337..968011bae 100644 --- a/src/analysis/lya_statistics.cpp +++ b/src/analysis/lya_statistics.cpp @@ -1,21 +1,22 @@ #ifdef ANALYSIS -#ifdef LYA_STATISTICS + #ifdef LYA_STATISTICS -#include "../analysis/analysis.h" -#include "../io/io.h" -#include -#include + #include + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include "../analysis/analysis.h" + #include "../io/io.h" + + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif // #define PRINT_ANALYSIS_LOG -void Analysis_Module::Transfer_Skewers_Global_Axis( int axis ){ - +void AnalysisModule::Transfer_Skewers_Global_Axis(int axis) +{ bool am_I_root; - int n_skewers_root, n_los; + int n_skewers_root, n_los; bool *root_procs; Real *skewers_density_root; Real *skewers_density_global; @@ -32,395 +33,381 @@ void Analysis_Module::Transfer_Skewers_Global_Axis( int axis ){ Real *skewers_F_HeII_global; Real *skewers_F_HeII_root; Real *transfer_buffer; - - + // chprintf( " Transfering Skewers \n" ); - - if ( axis == 0 ){ - am_I_root = am_I_root_x; - n_los = nx_total; - root_procs = root_procs_x; - n_skewers_root = n_skewers_local_x; - skewers_density_root = skewers_density_root_x; - skewers_density_global = skewers_density_x_global; - skewers_HI_density_root = skewers_HI_density_root_x; - skewers_HI_density_global = skewers_HI_density_x_global; + + if (axis == 0) { + am_I_root = am_I_root_x; + n_los = nx_total; + root_procs = root_procs_x; + n_skewers_root = n_skewers_local_x; + skewers_density_root = skewers_density_root_x; + skewers_density_global = skewers_density_x_global; + skewers_HI_density_root = skewers_HI_density_root_x; + skewers_HI_density_global = skewers_HI_density_x_global; skewers_HeII_density_root = skewers_HeII_density_root_x; skewers_HeII_density_global = skewers_HeII_density_x_global; - skewers_temperature_root = skewers_temperature_root_x; - skewers_temperature_global = skewers_temperature_x_global; + skewers_temperature_root = skewers_temperature_root_x; + skewers_temperature_global = skewers_temperature_x_global; skewers_los_velocity_root = skewers_velocity_root_x; skewers_los_velocity_global = skewers_los_velocity_x_global; - skewers_F_HI_global = skewers_transmitted_flux_HI_x_global; - skewers_F_HeII_global = skewers_transmitted_flux_HeII_x_global; - skewers_F_HI_root = skewers_transmitted_flux_HI_x; - skewers_F_HeII_root = skewers_transmitted_flux_HeII_x; - transfer_buffer = transfer_buffer_root_x; - } - if ( axis == 1 ){ - am_I_root = am_I_root_y; - n_los = ny_total; - root_procs = root_procs_y; - n_skewers_root = n_skewers_local_y; - skewers_density_root = skewers_density_root_y; - skewers_density_global = skewers_density_y_global; - skewers_HI_density_root = skewers_HI_density_root_y; - skewers_HI_density_global = skewers_HI_density_y_global; + skewers_F_HI_global = skewers_transmitted_flux_HI_x_global; + skewers_F_HeII_global = skewers_transmitted_flux_HeII_x_global; + skewers_F_HI_root = skewers_transmitted_flux_HI_x; + skewers_F_HeII_root = skewers_transmitted_flux_HeII_x; + transfer_buffer = transfer_buffer_root_x; + } + if (axis == 1) { + am_I_root = am_I_root_y; + n_los = ny_total; + root_procs = root_procs_y; + n_skewers_root = n_skewers_local_y; + skewers_density_root = skewers_density_root_y; + skewers_density_global = skewers_density_y_global; + skewers_HI_density_root = skewers_HI_density_root_y; + skewers_HI_density_global = skewers_HI_density_y_global; skewers_HeII_density_root = skewers_HeII_density_root_y; skewers_HeII_density_global = skewers_HeII_density_y_global; - skewers_temperature_root = skewers_temperature_root_y; - skewers_temperature_global = skewers_temperature_y_global; + skewers_temperature_root = skewers_temperature_root_y; + skewers_temperature_global = skewers_temperature_y_global; skewers_los_velocity_root = skewers_velocity_root_y; skewers_los_velocity_global = skewers_los_velocity_y_global; - skewers_F_HI_global = skewers_transmitted_flux_HI_y_global; - skewers_F_HeII_global = skewers_transmitted_flux_HeII_y_global; - skewers_F_HI_root = skewers_transmitted_flux_HI_y; - skewers_F_HeII_root = skewers_transmitted_flux_HeII_y; - transfer_buffer = transfer_buffer_root_y; - } - if ( axis == 2 ){ - am_I_root = am_I_root_z; - n_los = nz_total; - root_procs = root_procs_z; - n_skewers_root = n_skewers_local_z; - skewers_density_root = skewers_density_root_z; - skewers_density_global = skewers_density_z_global; - skewers_HI_density_root = skewers_HI_density_root_z; - skewers_HI_density_global = skewers_HI_density_z_global; + skewers_F_HI_global = skewers_transmitted_flux_HI_y_global; + skewers_F_HeII_global = skewers_transmitted_flux_HeII_y_global; + skewers_F_HI_root = skewers_transmitted_flux_HI_y; + skewers_F_HeII_root = skewers_transmitted_flux_HeII_y; + transfer_buffer = transfer_buffer_root_y; + } + if (axis == 2) { + am_I_root = am_I_root_z; + n_los = nz_total; + root_procs = root_procs_z; + n_skewers_root = n_skewers_local_z; + skewers_density_root = skewers_density_root_z; + skewers_density_global = skewers_density_z_global; + skewers_HI_density_root = skewers_HI_density_root_z; + skewers_HI_density_global = skewers_HI_density_z_global; skewers_HeII_density_root = skewers_HeII_density_root_z; skewers_HeII_density_global = skewers_HeII_density_z_global; - skewers_temperature_root = skewers_temperature_root_z; - skewers_temperature_global = skewers_temperature_z_global; + skewers_temperature_root = skewers_temperature_root_z; + skewers_temperature_global = skewers_temperature_z_global; skewers_los_velocity_root = skewers_velocity_root_z; skewers_los_velocity_global = skewers_los_velocity_z_global; - skewers_F_HI_global = skewers_transmitted_flux_HI_z_global; - skewers_F_HeII_global = skewers_transmitted_flux_HeII_z_global; - skewers_F_HI_root = skewers_transmitted_flux_HI_z; - skewers_F_HeII_root = skewers_transmitted_flux_HeII_z; - transfer_buffer = transfer_buffer_root_z; + skewers_F_HI_global = skewers_transmitted_flux_HI_z_global; + skewers_F_HeII_global = skewers_transmitted_flux_HeII_z_global; + skewers_F_HI_root = skewers_transmitted_flux_HI_z; + skewers_F_HeII_root = skewers_transmitted_flux_HeII_z; + transfer_buffer = transfer_buffer_root_z; } - - if ( !am_I_root ) return; + + if (!am_I_root) return; MPI_Status mpi_status; int n_added, offset; - #ifdef OUTPUT_SKEWERS + #ifdef OUTPUT_SKEWERS // Set the density array - if ( procID == 0){ + if (procID == 0) { // Write the local data into the global array - for ( int skewer_id=0; skewer_id values[N-1] ) return -1; + if (val < values[0]) return -2; + if (val > values[N - 1]) return -1; int index = 0; - while ( index < N ){ - if ( val < values[index] ) break; + while (index < N) { + if (val < values[index]) break; index += 1; } - if ( val < values[index-1] ){ - chprintf( "ERROR; Value less than left edge: val=%f left=%f \n", val, values[index-1] ); + if (val < values[index - 1]) { + chprintf("ERROR; Value less than left edge: val=%f left=%f \n", val, values[index - 1]); exit(-1); } - if ( val > values[index] ){ - chprintf( "ERROR; Value grater than right edge: val=%f right=%f \n", val, values[index] ); + if (val > values[index]) { + chprintf("ERROR; Value grater than right edge: val=%f right=%f \n", val, values[index]); exit(-1); } - // chprintf( " %d: %e %e %e \n ", index, values[index-1], val, values[index]); - return index-1; - + // chprintf( " %d: %e %e %e \n ", index, values[index-1], val, + // values[index]); + return index - 1; } -void Analysis_Module::Clear_Power_Spectrum_Measurements( void ){ - - MPI_Barrier( world ); +void AnalysisModule::Clear_Power_Spectrum_Measurements(void) +{ + MPI_Barrier(world); // chprintf( "Cleared Power Spectrum cache \n "); - free( hist_k_edges_x ); - free( hist_PS_x ); - free( hist_n_x ); - free( ps_root_x ); - free( ps_global_x ); - - free( hist_k_edges_y ); - free( hist_PS_y ); - free( hist_n_y ); - free( ps_root_y ); - free( ps_global_y ); - - free( hist_k_edges_z ); - free( hist_PS_z ); - free( hist_n_z ); - free( ps_root_z ); - free( ps_global_z ); - - free( k_centers ); - free( ps_mean ); - + free(hist_k_edges_x); + free(hist_PS_x); + free(hist_n_x); + free(ps_root_x); + free(ps_global_x); + + free(hist_k_edges_y); + free(hist_PS_y); + free(hist_n_y); + free(ps_root_y); + free(ps_global_y); + + free(hist_k_edges_z); + free(hist_PS_z); + free(hist_n_z); + free(ps_root_z); + free(ps_global_z); + + free(k_centers); + free(ps_mean); } -void Grid3D::Initialize_Power_Spectrum_Measurements( int axis ){ - +void Grid3D::Initialize_Power_Spectrum_Measurements(int axis) +{ int n_los, n_fft; Real Lbox, delta_x; Real *k_vals; - if ( axis == 0 ){ + if (axis == 0) { Analysis.n_PS_processed_x = 0; - n_los = Analysis.nx_total; - n_fft = Analysis.n_fft_x; - Lbox = Analysis.Lbox_x; - delta_x = Analysis.dx; - k_vals = Analysis.k_vals_x; + n_los = Analysis.nx_total; + n_fft = Analysis.n_fft_x; + Lbox = Analysis.Lbox_x; + delta_x = Analysis.dx; + k_vals = Analysis.k_vals_x; } - if ( axis == 1 ){ + if (axis == 1) { Analysis.n_PS_processed_y = 0; - n_los = Analysis.ny_total; - n_fft = Analysis.n_fft_y; - Lbox = Analysis.Lbox_y; - delta_x = Analysis.dy; - k_vals = Analysis.k_vals_y; + n_los = Analysis.ny_total; + n_fft = Analysis.n_fft_y; + Lbox = Analysis.Lbox_y; + delta_x = Analysis.dy; + k_vals = Analysis.k_vals_y; } - if ( axis == 2 ){ + if (axis == 2) { Analysis.n_PS_processed_z = 0; - n_los = Analysis.nz_total; - n_fft = Analysis.n_fft_z; - Lbox = Analysis.Lbox_z; - delta_x = Analysis.dz; - k_vals = Analysis.k_vals_z; + n_los = Analysis.nz_total; + n_fft = Analysis.n_fft_z; + Lbox = Analysis.Lbox_z; + delta_x = Analysis.dz; + k_vals = Analysis.k_vals_z; } - // Get Cosmological variables Real H, current_a, L_proper, dx_proper, dv_Hubble; current_a = Cosmo.current_a; - L_proper = Lbox * current_a / Cosmo.cosmo_h; + L_proper = Lbox * current_a / Cosmo.cosmo_h; dx_proper = delta_x * current_a / Cosmo.cosmo_h; - H = Cosmo.Get_Hubble_Parameter( current_a ); - dv_Hubble = H * dx_proper; // km/s - + H = Cosmo.Get_Hubble_Parameter(current_a); + dv_Hubble = H * dx_proper; // km/s // Compute the K values - for ( int i=0; i= n_bins ) continue; + if (k_val == 0) continue; + bin_id = Locate_Index(k_val, hist_k_edges, n_hist_edges); + if (bin_id < 0) chprintf(" %d: %e %e %e \n", bin_id, hist_k_edges[0], k_val, hist_k_edges[1]); + if (bin_id < 0 || bin_id >= n_bins) continue; hist_PS[bin_id] += fft2_delta_F[i]; - hist_n[bin_id] += 1; + hist_n[bin_id] += 1; } int hist_sum = 0; - for ( int i=0; i mpi_indices; MPI_Status mpi_status; - #endif + #endif - if ( axis == 0 ){ - root_id = root_id_x; - am_I_root = am_I_root_x; - n_los_local = nx_local; - n_los_total = nx_total; - n_skewers = n_skewers_local_x; - skewers_HI_density_local = skewers_HI_density_local_x; - skewers_HI_density_root = skewers_HI_density_root_x; + if (axis == 0) { + root_id = root_id_x; + am_I_root = am_I_root_x; + n_los_local = nx_local; + n_los_total = nx_total; + n_skewers = n_skewers_local_x; + skewers_HI_density_local = skewers_HI_density_local_x; + skewers_HI_density_root = skewers_HI_density_root_x; skewers_HeII_density_local = skewers_HeII_density_local_x; skewers_HeII_density_root = skewers_HeII_density_root_x; - skewers_velocity_local = skewers_velocity_local_x; - skewers_temperature_local = skewers_temperature_local_x; - skewers_velocity_root = skewers_velocity_root_x; - skewers_temperature_root = skewers_temperature_root_x; + skewers_velocity_local = skewers_velocity_local_x; + skewers_temperature_local = skewers_temperature_local_x; + skewers_velocity_root = skewers_velocity_root_x; + skewers_temperature_root = skewers_temperature_root_x; #ifdef MPI_CHOLLA mpi_indices = mpi_indices_x; #endif #ifdef OUTPUT_SKEWERS - skewers_density_root = skewers_density_root_x; + skewers_density_root = skewers_density_root_x; skewers_density_local = skewers_density_local_x; #endif } - - if ( axis == 1 ){ - root_id = root_id_y; - am_I_root = am_I_root_y; - n_los_local = ny_local; - n_los_total = ny_total; - n_skewers = n_skewers_local_y; - skewers_HI_density_local = skewers_HI_density_local_y; + if (axis == 1) { + root_id = root_id_y; + am_I_root = am_I_root_y; + n_los_local = ny_local; + n_los_total = ny_total; + n_skewers = n_skewers_local_y; + skewers_HI_density_local = skewers_HI_density_local_y; skewers_HeII_density_local = skewers_HeII_density_local_y; skewers_HeII_density_root = skewers_HeII_density_root_y; - skewers_velocity_local = skewers_velocity_local_y; - skewers_temperature_local = skewers_temperature_local_y; - skewers_HI_density_root = skewers_HI_density_root_y; - skewers_velocity_root = skewers_velocity_root_y; - skewers_temperature_root = skewers_temperature_root_y; + skewers_velocity_local = skewers_velocity_local_y; + skewers_temperature_local = skewers_temperature_local_y; + skewers_HI_density_root = skewers_HI_density_root_y; + skewers_velocity_root = skewers_velocity_root_y; + skewers_temperature_root = skewers_temperature_root_y; #ifdef MPI_CHOLLA mpi_indices = mpi_indices_y; #endif #ifdef OUTPUT_SKEWERS - skewers_density_root = skewers_density_root_y; + skewers_density_root = skewers_density_root_y; skewers_density_local = skewers_density_local_y; #endif } - - if ( axis == 2 ){ - root_id = root_id_z; - am_I_root = am_I_root_z; - n_los_local = nz_local; - n_los_total = nz_total; - n_skewers = n_skewers_local_z; - skewers_HI_density_local = skewers_HI_density_local_z; + if (axis == 2) { + root_id = root_id_z; + am_I_root = am_I_root_z; + n_los_local = nz_local; + n_los_total = nz_total; + n_skewers = n_skewers_local_z; + skewers_HI_density_local = skewers_HI_density_local_z; skewers_HeII_density_local = skewers_HeII_density_local_z; skewers_HeII_density_root = skewers_HeII_density_root_z; - skewers_velocity_local = skewers_velocity_local_z; - skewers_temperature_local = skewers_temperature_local_z; - skewers_HI_density_root = skewers_HI_density_root_z; - skewers_velocity_root = skewers_velocity_root_z; - skewers_temperature_root = skewers_temperature_root_z; + skewers_velocity_local = skewers_velocity_local_z; + skewers_temperature_local = skewers_temperature_local_z; + skewers_HI_density_root = skewers_HI_density_root_z; + skewers_velocity_root = skewers_velocity_root_z; + skewers_temperature_root = skewers_temperature_root_z; #ifdef MPI_CHOLLA mpi_indices = mpi_indices_z; #endif #ifdef OUTPUT_SKEWERS - skewers_density_root = skewers_density_root_z; + skewers_density_root = skewers_density_root_z; skewers_density_local = skewers_density_local_z; #endif } - // Copy Skewers Local Data to Root data Real HI_density, HeII_density, velocity, temperature; Real density; - - #ifdef MPI_CHOLLA - if ( am_I_root ){ - if ( root_id != procID ){ - printf("ERROR: Root ID doesn't match procID\n" ); + #ifdef MPI_CHOLLA + if (am_I_root) { + if (root_id != procID) { + printf("ERROR: Root ID doesn't match procID\n"); exit(-1); } - for ( int skewer_id=0; skewer_idlya_skewers_stride; - chprintf(" Lya Skewers Stride: %d\n", n_stride ); + chprintf(" Lya Skewers Stride: %d\n", n_stride); d_log_k = P->lya_Pk_d_log_k; - chprintf(" Power Spectrum d_log_k: %f\n", d_log_k ); + chprintf(" Power Spectrum d_log_k: %f\n", d_log_k); - n_skewers_local_x = ( ny_local / n_stride ) * ( nz_local / n_stride ); - n_skewers_local_y = ( nx_local / n_stride ) * ( nz_local / n_stride ); - n_skewers_local_z = ( nx_local / n_stride ) * ( ny_local / n_stride ); + n_skewers_local_x = (ny_local / n_stride) * (nz_local / n_stride); + n_skewers_local_y = (nx_local / n_stride) * (nz_local / n_stride); + n_skewers_local_z = (nx_local / n_stride) * (ny_local / n_stride); - #ifdef MPI_CHOLLA - n_skewers_total_x = ( ny_total / n_stride ) * ( nz_total / n_stride ); - n_skewers_total_y = ( nx_total / n_stride ) * ( nz_total / n_stride ); - n_skewers_total_z = ( nx_total / n_stride ) * ( ny_total / n_stride ); - #else + #ifdef MPI_CHOLLA + n_skewers_total_x = (ny_total / n_stride) * (nz_total / n_stride); + n_skewers_total_y = (nx_total / n_stride) * (nz_total / n_stride); + n_skewers_total_z = (nx_total / n_stride) * (ny_total / n_stride); + #else n_skewers_total_x = n_skewers_local_x; n_skewers_total_y = n_skewers_local_y; n_skewers_total_z = n_skewers_local_z; - #endif - + #endif // Alocate Memory For Properties of Local Skewers - skewers_HI_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_HI_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_HI_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - skewers_HeII_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_HeII_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_HeII_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - skewers_velocity_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_velocity_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_velocity_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - skewers_temperature_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_temperature_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_temperature_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - - #ifdef OUTPUT_SKEWERS - skewers_density_local_x = (Real *) malloc(n_skewers_local_x*nx_local*sizeof(Real)); - skewers_density_local_y = (Real *) malloc(n_skewers_local_y*ny_local*sizeof(Real)); - skewers_density_local_z = (Real *) malloc(n_skewers_local_z*nz_local*sizeof(Real)); - #endif - - - // for (int i=0; i 0 ){ + if (n_mpi_x > 0) { sorted = true; - while ( !sorted ){ + while (!sorted) { sorted = true; - for (int i=0; i mpi_domain_boundary_x[mpi_indices_x[i+1]] ){ - temp_indx = mpi_indices_x[i]; - mpi_indices_x[i] = mpi_indices_x[i+1]; - mpi_indices_x[i+1] = temp_indx; - sorted = false; + for (int i = 0; i < n_mpi_x - 1; i++) { + if (mpi_domain_boundary_x[mpi_indices_x[i]] > mpi_domain_boundary_x[mpi_indices_x[i + 1]]) { + temp_indx = mpi_indices_x[i]; + mpi_indices_x[i] = mpi_indices_x[i + 1]; + mpi_indices_x[i + 1] = temp_indx; + sorted = false; } } } } - if ( n_mpi_y > 0 ){ + if (n_mpi_y > 0) { sorted = true; - while ( !sorted ){ + while (!sorted) { sorted = true; - for (int i=0; i mpi_domain_boundary_y[mpi_indices_y[i+1]] ){ - temp_indx = mpi_indices_y[i]; - mpi_indices_y[i] = mpi_indices_y[i+1]; - mpi_indices_y[i+1] = temp_indx; - sorted = false; + for (int i = 0; i < n_mpi_y - 1; i++) { + if (mpi_domain_boundary_y[mpi_indices_y[i]] > mpi_domain_boundary_y[mpi_indices_y[i + 1]]) { + temp_indx = mpi_indices_y[i]; + mpi_indices_y[i] = mpi_indices_y[i + 1]; + mpi_indices_y[i + 1] = temp_indx; + sorted = false; } } } } - if ( n_mpi_z > 0 ){ + if (n_mpi_z > 0) { sorted = true; - while ( !sorted ){ + while (!sorted) { sorted = true; - for (int i=0; i mpi_domain_boundary_z[mpi_indices_z[i+1]] ){ - temp_indx = mpi_indices_z[i]; - mpi_indices_z[i] = mpi_indices_z[i+1]; - mpi_indices_z[i+1] = temp_indx; - sorted = false; + for (int i = 0; i < n_mpi_z - 1; i++) { + if (mpi_domain_boundary_z[mpi_indices_z[i]] > mpi_domain_boundary_z[mpi_indices_z[i + 1]]) { + temp_indx = mpi_indices_z[i]; + mpi_indices_z[i] = mpi_indices_z[i + 1]; + mpi_indices_z[i + 1] = temp_indx; + sorted = false; } } } } - - // for (int i=0; i /* printf */ -#include -#include "../analysis/analysis.h" -#include "../io/io.h" + #include + #include /* printf */ -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include "../analysis/analysis.h" + #include "../io/io.h" -void Grid3D::Compute_Phase_Diagram(){ + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif +void Grid3D::Compute_Phase_Diagram() +{ int n_temp, n_dens; Real temp_min, temp_max, dens_min, dens_max; Real log_temp_min, log_temp_max, log_dens_min, log_dens_max; Real log_delta_dens, log_delta_temp; - n_dens = Analysis.n_dens; - n_temp = Analysis.n_temp; + n_dens = Analysis.n_dens; + n_temp = Analysis.n_temp; dens_min = Analysis.dens_min; dens_max = Analysis.dens_max; temp_min = Analysis.temp_min; temp_max = Analysis.temp_max; - log_dens_min = log10( dens_min ); - log_dens_max = log10( dens_max ); - log_temp_min = log10( temp_min ); - log_temp_max = log10( temp_max ); - - log_delta_dens = ( log_dens_max - log_dens_min ) / n_dens; - log_delta_temp = ( log_temp_max - log_temp_min ) / n_temp; + log_dens_min = log10(dens_min); + log_dens_max = log10(dens_max); + log_temp_min = log10(temp_min); + log_temp_max = log10(temp_max); + log_delta_dens = (log_dens_max - log_dens_min) / n_dens; + log_delta_temp = (log_temp_max - log_temp_min) / n_temp; int nx_local, ny_local, nz_local, n_ghost; int nx_grid, ny_grid, nz_grid; nx_local = Analysis.nx_local; ny_local = Analysis.ny_local; nz_local = Analysis.nz_local; - n_ghost = Analysis.n_ghost; - nx_grid = nx_local + 2*n_ghost; - ny_grid = ny_local + 2*n_ghost; - nz_grid = nz_local + 2*n_ghost; - - + n_ghost = Analysis.n_ghost; + nx_grid = nx_local + 2 * n_ghost; + ny_grid = ny_local + 2 * n_ghost; + nz_grid = nz_local + 2 * n_ghost; Real dens, log_dens, temp, log_temp; int k, j, i, id_grid; int indx_dens, indx_temp, indx_phase; + // Clear Phase Dikagram + for (indx_phase = 0; indx_phase < n_temp * n_dens; indx_phase++) Analysis.phase_diagram[indx_phase] = 0; + + for (k = 0; k < nz_local; k++) { + for (j = 0; j < ny_local; j++) { + for (i = 0; i < nx_local; i++) { + id_grid = (i + n_ghost) + (j + n_ghost) * nx_grid + (k + n_ghost) * nx_grid * ny_grid; + dens = C.density[id_grid] * Cosmo.rho_0_gas / Cosmo.rho_mean_baryon; // Baryonic overdensity + // chprintf( "%f %f \n", dens, temp); + #ifdef COOLING_GRACKLE + temp = Cool.temperature[id_grid]; + #elif defined CHEMISTRY_GPU + temp = Chem.Fields.temperature_h[id_grid]; + #else + chprintf( + "ERROR: Temperature Field is only supported for Grackle Cooling or " + "CHEMISTRY_GPU\n"); + exit(-1); + #endif - //Clear Phase Dikagram - for (indx_phase=0; indx_phase dens_max || temp < temp_min || temp > temp_max ){ - // printf("Outside Phase Diagram: dens:%e temp:%e \n", dens, temp ); - continue; - } - log_dens = log10(dens); - log_temp = log10(temp); - indx_dens = ( log_dens - log_dens_min ) / log_delta_dens; - indx_temp = ( log_temp - log_temp_min ) / log_delta_temp; - - indx_phase = indx_temp + indx_dens*n_temp; - if ( indx_phase >= n_dens*n_temp || indx_phase < 0 ){ - printf("Index outside Phase Diagram: indx:%d N:%d dens:%e temp:%e indx_dens:%d indx_temp:%d \n", indx_phase, n_dens*n_temp, dens, temp, indx_dens, indx_temp ); - continue; - } - Analysis.phase_diagram[indx_phase] += 1; - + if (dens < dens_min || dens > dens_max || temp < temp_min || temp > temp_max) { + // printf("Outside Phase Diagram: dens:%e temp:%e \n", dens, temp + // ); + continue; + } + log_dens = log10(dens); + log_temp = log10(temp); + indx_dens = (log_dens - log_dens_min) / log_delta_dens; + indx_temp = (log_temp - log_temp_min) / log_delta_temp; + + indx_phase = indx_temp + indx_dens * n_temp; + if (indx_phase >= n_dens * n_temp || indx_phase < 0) { + printf( + "Index outside Phase Diagram: indx:%d N:%d dens:%e temp:%e " + " indx_dens:%d indx_temp:%d \n", + indx_phase, n_dens * n_temp, dens, temp, indx_dens, indx_temp); + continue; + } + Analysis.phase_diagram[indx_phase] += 1; } } } // Real phase_sum_local = 0; - // for (indx_phase=0; indx_phaseH0; + + #ifdef COSMOLOGY + Chem.H.H0 = P->H0; Chem.H.Omega_M = P->Omega_M; Chem.H.Omega_L = P->Omega_L; - - + #endif // COSMOLOGY + // Set up the units system. Real Msun, kpc_cgs, kpc_km, dens_to_CGS; - Msun = MSUN_CGS; - kpc_cgs = KPC_CGS; - kpc_km = KPC; - dens_to_CGS = Cosmo.rho_0_gas * Msun / kpc_cgs / kpc_cgs / kpc_cgs * Cosmo.cosmo_h * Cosmo.cosmo_h; - + Msun = MSUN_CGS; + kpc_cgs = KPC_CGS; + kpc_km = KPC; + dens_to_CGS = Msun / kpc_cgs / kpc_cgs / kpc_cgs; + #ifdef COSMOLOGY + dens_to_CGS = dens_to_CGS * Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h; + #endif // COSMOLOGY + // These are conversions from code units to cgs. Following Grackle - Chem.H.a_value = Cosmo.current_a; - Chem.H.density_units = dens_to_CGS / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value ; - Chem.H.length_units = kpc_cgs / Cosmo.cosmo_h * Chem.H.a_value; - Chem.H.time_units = kpc_km / Cosmo.cosmo_h ; - Chem.H.velocity_units = Chem.H.length_units /Chem.H.time_units; - Chem.H.dens_number_conv = Chem.H.density_units * pow(Chem.H.a_value, 3) / MH; - + Chem.H.density_units = dens_to_CGS; + Chem.H.length_units = kpc_cgs; + Chem.H.time_units = kpc_km; + Chem.H.dens_number_conv = Chem.H.density_units / MH; + #ifdef COSMOLOGY + Chem.H.a_value = Cosmo.current_a; + Chem.H.density_units = Chem.H.density_units / Chem.H.a_value / Chem.H.a_value / Chem.H.a_value; + Chem.H.length_units = Chem.H.length_units / Cosmo.cosmo_h * Chem.H.a_value; + Chem.H.time_units = Chem.H.time_units / Cosmo.cosmo_h; + Chem.H.dens_number_conv = Chem.H.density_number_conv * pow(Chem.H.a_value, 3); + #endif // COSMOLOGY + Chem.H.velocity_units = Chem.H.length_units / Chem.H.time_units; + Real dens_base, length_base, time_base; - dens_base = Chem.H.density_units * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value; - length_base = Chem.H.length_units / Chem.H.a_value; - time_base = Chem.H.time_units; - Chem.H.cooling_units = ( pow(length_base, 2) * pow(MH, 2) ) / ( dens_base * pow(time_base, 3) ); - Chem.H.reaction_units = MH / (dens_base * time_base ); + dens_base = Chem.H.density_units; + length_base = Chem.H.length_units; + #ifdef COSMOLOGY + dens_base = dens_base * Chem.H.a_value * Chem.H.a_value * Chem.H.a_value; + length_base = length_base / Chem.H.a_value; + #endif // COSMOLOGY + + time_base = Chem.H.time_units; + Chem.H.cooling_units = (pow(length_base, 2) * pow(MH, 2)) / (dens_base * pow(time_base, 3)); + Chem.H.reaction_units = MH / (dens_base * time_base); // printf(" cooling_units: %e\n", Chem.H.cooling_units ); // printf(" reaction_units: %e\n", Chem.H.reaction_units ); - + Chem.H.max_iter = 10000; - + // Initialize all the rates - Chem.Initialize( P ); - + Chem.Initialize(P); + #ifdef COSMOLOGY // Real kpc_cgs = KPC_CGS; - Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / pow( kpc_cgs, 3) * MSUN_CGS ; - Chem.H.energy_conversion = Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10; //km^2 -> cm^2 ; - #else // Not COSMOLOGY + Chem.H.density_conversion = Cosmo.rho_0_gas * Cosmo.cosmo_h * Cosmo.cosmo_h / pow(kpc_cgs, 3) * MSUN_CGS; + Chem.H.energy_conversion = Cosmo.v_0_gas * Cosmo.v_0_gas * 1e10; // km^2 -> cm^2 ; + #else // Not COSMOLOGY Chem.H.density_conversion = 1.0; Chem.H.energy_conversion = 1.0; #endif - Chem.H.n_uvb_rates_samples = Chem.n_uvb_rates_samples; - Chem.H.uvb_rates_redshift_d = Chem.rates_z_d; - Chem.H.photo_ion_HI_rate_d = Chem.Ion_rates_HI_d; - Chem.H.photo_ion_HeI_rate_d = Chem.Ion_rates_HeI_d; - Chem.H.photo_ion_HeII_rate_d = Chem.Ion_rates_HeII_d; + Chem.H.n_uvb_rates_samples = Chem.n_uvb_rates_samples; + Chem.H.uvb_rates_redshift_d = Chem.rates_z_d; + Chem.H.photo_ion_HI_rate_d = Chem.Ion_rates_HI_d; + Chem.H.photo_ion_HeI_rate_d = Chem.Ion_rates_HeI_d; + Chem.H.photo_ion_HeII_rate_d = Chem.Ion_rates_HeII_d; Chem.H.photo_heat_HI_rate_d = Chem.Heat_rates_HI_d; Chem.H.photo_heat_HeI_rate_d = Chem.Heat_rates_HeI_d; Chem.H.photo_heat_HeII_rate_d = Chem.Heat_rates_HeII_d; - - chprintf( "Allocating Memory. \n\n"); - int n_cells = H.nx * H.ny * H.nz; - Chem.Fields.temperature_h = (Real *) malloc(n_cells * sizeof(Real)); - - chprintf( "Chemistry Solver Successfully Initialized. \n\n"); + chprintf("Allocating Memory. \n\n"); + int n_cells = H.nx * H.ny * H.nz; + Chem.Fields.temperature_h = (Real *)malloc(n_cells * sizeof(Real)); + + chprintf("Chemistry Solver Successfully Initialized. \n\n"); } +void Chem_GPU::Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units) +{ + // Host array for storing the rates + Real *rate_table_array_h = (Real *)malloc(H.N_Temp_bins * sizeof(Real)); -void Chem_GPU::Generate_Reaction_Rate_Table( Real **rate_table_array_d, Rate_Function_T rate_function, Real units ){ - - // Host array for storing the rates - Real *rate_table_array_h = (Real *) malloc( H.N_Temp_bins * sizeof(Real) ); - - //Get the temperature spacing. + // Get the temperature spacing. Real T, logT, logT_start, d_logT; logT_start = log(H.Temp_start); - d_logT = ( log(H.Temp_end) - logT_start ) / ( H.N_Temp_bins - 1 ); - + d_logT = (log(H.Temp_end) - logT_start) / (H.N_Temp_bins - 1); + // Evaluate the rate at each temperature. - for (int i=0; i 1e7 ) chprintf( "Temperature: %e mu: %e \n", temp, mu ); - + // if ( temp > 1e7 ) chprintf( "Temperature: %e mu: %e \n", temp, mu + // ); } - } - } - + } + } } -void Chem_GPU::Reset(){ - - free( rates_z_h ); - free( Heat_rates_HI_h ); - free( Heat_rates_HeI_h ); - free( Heat_rates_HeII_h ); - free( Ion_rates_HI_h ); - free( Ion_rates_HeI_h ); - free( Ion_rates_HeII_h ); - - Free_Array_GPU_float( rates_z_d ); - Free_Array_GPU_float( Heat_rates_HI_d ); - Free_Array_GPU_float( Heat_rates_HeI_d ); - Free_Array_GPU_float( Heat_rates_HeII_d ); - Free_Array_GPU_float( Ion_rates_HI_d ); - Free_Array_GPU_float( Ion_rates_HeI_d ); - Free_Array_GPU_float( Ion_rates_HeII_d ); - - free( Fields.temperature_h ); - +void Chem_GPU::Reset() +{ + free(rates_z_h); + free(Heat_rates_HI_h); + free(Heat_rates_HeI_h); + free(Heat_rates_HeII_h); + free(Ion_rates_HI_h); + free(Ion_rates_HeI_h); + free(Ion_rates_HeII_h); + + Free_Array_GPU_float(rates_z_d); + Free_Array_GPU_float(Heat_rates_HI_d); + Free_Array_GPU_float(Heat_rates_HeI_d); + Free_Array_GPU_float(Heat_rates_HeII_d); + Free_Array_GPU_float(Ion_rates_HI_d); + Free_Array_GPU_float(Ion_rates_HeI_d); + Free_Array_GPU_float(Ion_rates_HeII_d); + + free(Fields.temperature_h); } - - - - - - - - - - -#endif \ No newline at end of file +#endif diff --git a/src/chemistry_gpu/chemistry_functions_gpu.cu b/src/chemistry_gpu/chemistry_functions_gpu.cu index 3d6e0052f..7c9bfe2cf 100644 --- a/src/chemistry_gpu/chemistry_functions_gpu.cu +++ b/src/chemistry_gpu/chemistry_functions_gpu.cu @@ -1,497 +1,518 @@ #ifdef CHEMISTRY_GPU -#include "chemistry_gpu.h" -#include "../hydro/hydro_cuda.h" -#include "../global/global_cuda.h" -#include "../io/io.h" -#include "rates.cuh" -#include "rates_Katz95.cuh" - -#define eV_to_K 1.160451812e4 -#define K_to_eV 8.617333263e-5 -#define n_min 1e-20 -#define tiny 1e-20 - -#define TPB_CHEM 256 - -void Chem_GPU::Allocate_Array_GPU_float( float **array_dev, int size ){ -cudaMalloc( (void**)array_dev, size*sizeof(float)); -CudaCheckError(); -} + #include "../global/global_cuda.h" + #include "../grid/grid_enum.h" + #include "../hydro/hydro_cuda.h" + #include "../io/io.h" + #include "chemistry_gpu.h" + #include "rates.cuh" + #include "rates_Katz95.cuh" -void Chem_GPU::Copy_Float_Array_to_Device( int size, float *array_h, float *array_d ){ -CudaSafeCall( cudaMemcpy(array_d, array_h, size*sizeof(float), cudaMemcpyHostToDevice ) ); -cudaDeviceSynchronize(); -} + #define eV_to_K 1.160451812e4 + #define K_to_eV 8.617333263e-5 + #define n_min 1e-20 + #define tiny 1e-20 -void Chem_GPU::Free_Array_GPU_float( float *array_dev ){ -cudaFree( array_dev ); -CudaCheckError(); -} + #define TPB_CHEM 256 -void Chem_GPU::Allocate_Array_GPU_Real( Real **array_dev, int size ){ -cudaMalloc( (void**)array_dev, size*sizeof(Real)); -CudaCheckError(); +void Chem_GPU::Allocate_Array_GPU_float(float **array_dev, int size) +{ + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(float))); } -void Chem_GPU::Copy_Real_Array_to_Device( int size, Real *array_h, Real *array_d ){ -CudaSafeCall( cudaMemcpy(array_d, array_h, size*sizeof(Real), cudaMemcpyHostToDevice ) ); -cudaDeviceSynchronize(); +void Chem_GPU::Copy_Float_Array_to_Device(int size, float *array_h, float *array_d) +{ + GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(float), cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); } -void Chem_GPU::Free_Array_GPU_Real( Real *array_dev ){ -cudaFree( array_dev ); -CudaCheckError(); -} +void Chem_GPU::Free_Array_GPU_float(float *array_dev) { GPU_Error_Check(cudaFree(array_dev)); } -class Thermal_State{ -public: - -Real U; -Real d; -Real d_HI; -Real d_HII; -Real d_HeI; -Real d_HeII; -Real d_HeIII; -Real d_e; - -// Constructor -__host__ __device__ Thermal_State( Real U_0=1, Real d_0=1, Real d_HI_0=1, Real d_HII_0=0, Real d_HeI_0=1, Real d_HeII_0=0, Real d_HeIII_0=1, Real d_e_0=0 ) : U(U_0), d(d_0), d_HI(d_HI_0), d_HII(d_HII_0), d_HeI(d_HeI_0), d_HeII(d_HeII_0), d_HeIII(d_HeIII_0), d_e(d_e_0) {} - -__host__ __device__ Real get_MMW( ){ - // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII; - Real n_tot = d_HI + d_HII + 0.25 * ( d_HeI + d_HeII + d_HeIII ) + d_e; - return d / n_tot; - // return m_tot / n_tot; +void Chem_GPU::Allocate_Array_GPU_Real(Real **array_dev, int size) +{ + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); + GPU_Error_Check(); } -__host__ __device__ Real get_temperature( Real gamma ){ - Real mu, temp; - mu = get_MMW(); - temp = (gamma - 1) * mu * U * MP / KB * 1e10; - return temp; +void Chem_GPU::Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d) +{ + GPU_Error_Check(cudaMemcpy(array_d, array_h, size * sizeof(Real), cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); } -__host__ __device__ Real compute_U( Real temp, Real gamma ){ - Real mu, U_local; - mu = get_MMW(); - U_local = temp / ( gamma - 1 ) / mu / MP * KB / 1e10; - return U_local; +void Chem_GPU::Free_Array_GPU_Real(Real *array_dev) +{ + GPU_Error_Check(cudaFree(array_dev)); + GPU_Error_Check(); } +class Thermal_State +{ + public: + Real U; + Real d; + Real d_HI; + Real d_HII; + Real d_HeI; + Real d_HeII; + Real d_HeIII; + Real d_e; + + // Constructor + __host__ __device__ Thermal_State(Real U_0 = 1, Real d_0 = 1, Real d_HI_0 = 1, Real d_HII_0 = 0, Real d_HeI_0 = 1, + Real d_HeII_0 = 0, Real d_HeIII_0 = 1, Real d_e_0 = 0) + : U(U_0), d(d_0), d_HI(d_HI_0), d_HII(d_HII_0), d_HeI(d_HeI_0), d_HeII(d_HeII_0), d_HeIII(d_HeIII_0), d_e(d_e_0) + { + } + + __host__ __device__ Real get_MMW() + { + // Real m_tot = d_HI + d_HII + d_HeI + d_HeII + d_HeIII; + Real n_tot = d_HI + d_HII + 0.25 * (d_HeI + d_HeII + d_HeIII) + d_e; + return d / n_tot; + // return m_tot / n_tot; + } + + __host__ __device__ Real get_temperature(Real gamma) + { + Real mu, temp; + mu = get_MMW(); + temp = (gamma - 1) * mu * U * MP / KB * 1e10; + return temp; + } + + __host__ __device__ Real compute_U(Real temp, Real gamma) + { + Real mu, U_local; + mu = get_MMW(); + U_local = temp / (gamma - 1) / mu / MP * KB / 1e10; + return U_local; + } }; -__device__ void get_temperature_indx( Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, bool print ){ - +__device__ void get_temperature_indx(Real T, Chemistry_Header &Chem_H, int &temp_indx, Real &delta_T, Real temp_old, + bool print) +{ Real logT, logT_start, d_logT, logT_l, logT_r; - logT = log( 0.5 * ( T + temp_old ) ); - logT_start = log( Chem_H.Temp_start ); - logT = fmax( logT_start, logT ); - logT = fmin( log( Chem_H.Temp_end ), logT ); - d_logT = ( log( Chem_H.Temp_end ) - logT_start ) / ( Chem_H.N_Temp_bins - 1 ); - temp_indx = (int) floor( (logT - logT_start) / d_logT ); - temp_indx = max( 0, temp_indx ); - temp_indx = min( Chem_H.N_Temp_bins-2, temp_indx ); - logT_l = logT_start + temp_indx * d_logT; - logT_r = logT_start + (temp_indx+1) * d_logT; - delta_T = ( logT - logT_l ) / ( logT_r - logT_l ); - // if (print) printf(" logT_start: %f logT_end: %f d_logT: %f \n", logT_start, log( Chem_H.Temp_end ), d_logT ); - // if (print) printf(" logT: %f logT_l: %f logT_r: %f \n", logT, logT_l, logT_r ); - -} - -__device__ Real interpolate_rate( Real *rate_table, int indx, Real delta ){ - + logT = log(0.5 * (T + temp_old)); + logT_start = log(Chem_H.Temp_start); + logT = fmax(logT_start, logT); + logT = fmin(log(Chem_H.Temp_end), logT); + d_logT = (log(Chem_H.Temp_end) - logT_start) / (Chem_H.N_Temp_bins - 1); + temp_indx = (int)floor((logT - logT_start) / d_logT); + temp_indx = max(0, temp_indx); + temp_indx = min(Chem_H.N_Temp_bins - 2, temp_indx); + logT_l = logT_start + temp_indx * d_logT; + logT_r = logT_start + (temp_indx + 1) * d_logT; + delta_T = (logT - logT_l) / (logT_r - logT_l); + // if (print) printf(" logT_start: %f logT_end: %f d_logT: %f \n", + // logT_start, log( Chem_H.Temp_end ), d_logT ); if (print) printf(" logT: %f + // logT_l: %f logT_r: %f \n", logT, logT_l, logT_r ); +} + +__device__ Real interpolate_rate(Real *rate_table, int indx, Real delta) +{ Real rate_val; rate_val = rate_table[indx]; - rate_val = rate_val + delta * ( rate_table[indx+1] - rate_val ); + rate_val = rate_val + delta * (rate_table[indx + 1] - rate_val); return rate_val; } -__device__ Real Get_Cooling_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z, Real temp_prev, - float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print ){ - +__device__ Real Get_Cooling_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, Real dens_number_conv, Real current_z, + Real temp_prev, float photo_h_HI, float photo_h_HeI, float photo_h_HeII, bool print) +{ int temp_indx; Real temp, delta_T, U_dot; - temp = TS.get_temperature( Chem_H.gamma ); - get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp_prev, print ); - if (print) printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), temp, temp_indx, delta_T ); + temp = TS.get_temperature(Chem_H.gamma); + get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp_prev, print); + if (print) printf("mu: %f temp: %f temp_indx: %d delta_T: %f \n", TS.get_MMW(), temp, temp_indx, delta_T); U_dot = 0.0; - + // Collisional excitation cooling - Real cool_ceHI, cool_ceHeI, cool_ceHeII; - cool_ceHI = interpolate_rate( Chem_H.cool_ceHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; - cool_ceHeI = interpolate_rate( Chem_H.cool_ceHeI_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0 ; - cool_ceHeII = interpolate_rate( Chem_H.cool_ceHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; + Real cool_ceHI, cool_ceHeI, cool_ceHeII; + cool_ceHI = interpolate_rate(Chem_H.cool_ceHI_d, temp_indx, delta_T) * TS.d_HI * TS.d_e; + cool_ceHeI = + interpolate_rate(Chem_H.cool_ceHeI_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; + cool_ceHeII = interpolate_rate(Chem_H.cool_ceHeII_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; U_dot -= cool_ceHI + cool_ceHeI + cool_ceHeII; - + // Collisional excitation cooling Real cool_ciHI, cool_ciHeI, cool_ciHeII, cool_ciHeIS; - cool_ciHI = interpolate_rate( Chem_H.cool_ciHI_d, temp_indx, delta_T ) * TS.d_HI * TS.d_e; - cool_ciHeI = interpolate_rate( Chem_H.cool_ciHeI_d, temp_indx, delta_T ) * TS.d_HeI * TS.d_e / 4.0; - cool_ciHeII = interpolate_rate( Chem_H.cool_ciHeII_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; - cool_ciHeIS = interpolate_rate( Chem_H.cool_ciHeIS_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; + cool_ciHI = interpolate_rate(Chem_H.cool_ciHI_d, temp_indx, delta_T) * TS.d_HI * TS.d_e; + cool_ciHeI = interpolate_rate(Chem_H.cool_ciHeI_d, temp_indx, delta_T) * TS.d_HeI * TS.d_e / 4.0; + cool_ciHeII = interpolate_rate(Chem_H.cool_ciHeII_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_ciHeIS = + interpolate_rate(Chem_H.cool_ciHeIS_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e * TS.d_e * dens_number_conv / 4.0; U_dot -= cool_ciHI + cool_ciHeI + cool_ciHeII + cool_ciHeIS; - + // Recombination cooling Real cool_reHII, cool_reHeII1, cool_reHeII2, cool_reHeIII; - cool_reHII = interpolate_rate( Chem_H.cool_reHII_d, temp_indx, delta_T ) * TS.d_HII * TS.d_e; - cool_reHeII1 = interpolate_rate( Chem_H.cool_reHeII1_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; - cool_reHeII2 = interpolate_rate( Chem_H.cool_reHeII2_d, temp_indx, delta_T ) * TS.d_HeII * TS.d_e / 4.0; - cool_reHeIII = interpolate_rate( Chem_H.cool_reHeIII_d, temp_indx, delta_T ) * TS.d_HeIII * TS.d_e / 4.0; + cool_reHII = interpolate_rate(Chem_H.cool_reHII_d, temp_indx, delta_T) * TS.d_HII * TS.d_e; + cool_reHeII1 = interpolate_rate(Chem_H.cool_reHeII_1_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_reHeII2 = interpolate_rate(Chem_H.cool_reHeII_2_d, temp_indx, delta_T) * TS.d_HeII * TS.d_e / 4.0; + cool_reHeIII = interpolate_rate(Chem_H.cool_reHeIII_d, temp_indx, delta_T) * TS.d_HeIII * TS.d_e / 4.0; U_dot -= cool_reHII + cool_reHeII1 + cool_reHeII2 + cool_reHeIII; - + // Bremsstrahlung cooling Real cool_brem; - cool_brem = interpolate_rate( Chem_H.cool_brem_d, temp_indx, delta_T ) * ( TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII ) * TS.d_e; + cool_brem = + interpolate_rate(Chem_H.cool_brem_d, temp_indx, delta_T) * (TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII) * TS.d_e; U_dot -= cool_brem; - + // Compton cooling or heating Real cool_compton, temp_cmb; - temp_cmb = 2.73 * ( 1.0 + current_z ); - cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * ( temp - temp_cmb ) * TS.d_e / dens_number_conv; + temp_cmb = 2.73 * (1.0 + current_z); + cool_compton = Chem_H.cool_compton * pow(1.0 + current_z, 4) * (temp - temp_cmb) * TS.d_e / dens_number_conv; U_dot -= cool_compton; - + // Phothoheating Real photo_heat; - photo_heat = ( photo_h_HI * TS.d_HI + 0.25 * ( photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII ) ) / dens_number_conv; + photo_heat = (photo_h_HI * TS.d_HI + 0.25 * (photo_h_HeI * TS.d_HeI + photo_h_HeII * TS.d_HeII)) / dens_number_conv; U_dot += photo_heat; - - if ( temp <= 1.01* Chem_H.Temp_start && fabs( U_dot ) < 0 ) U_dot = tiny; - if ( fabs(U_dot) < tiny ) U_dot = tiny; - - - if (print) printf("HI: %e \n", TS.d_HI ); - if (print) printf("HII: %e \n", TS.d_HII ); - if (print) printf("HeI: %e \n", TS.d_HeI ); - if (print) printf("HeII: %e \n", TS.d_HeII ); - if (print) printf("HeIII: %e \n", TS.d_HeIII ); - if (print) printf("de: %e \n", TS.d_e ); - if (print) printf("Cooling ceHI: %e \n", cool_ceHI ); - if (print) printf("Cooling ceHeI: %e \n", cool_ceHeI ); - if (print) printf("Cooling ceHeII: %e \n", cool_ceHeII ); - if (print) printf("Cooling ciHI: %e \n", cool_ciHI ); - if (print) printf("Cooling ciHeI: %e \n", cool_ciHeI ); - if (print) printf("Cooling ciHeII: %e \n", cool_ciHeII ); - if (print) printf("Cooling ciHeIS: %e \n", cool_ciHeIS ); - if (print) printf("Cooling reHII: %e \n", cool_reHII ); - if (print) printf("Cooling reHeII1: %e \n", cool_reHeII1 ); - if (print) printf("Cooling reHeII2: %e \n", cool_reHeII2 ); - if (print) printf("Cooling reHeIII: %e \n", cool_reHeIII ); - if (print) printf("Cooling brem: %e \n", cool_brem ); - if (print) printf("Cooling piHI: %e rate: %e \n", photo_h_HI, photo_h_HI * TS.d_HI / dens_number_conv ); - if (print) printf("Cooling piHeI: %e rate: %e \n", photo_h_HeI, photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25 ); - if (print) printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25); - if (print) printf("Cooling DOM: %e \n", dens_number_conv ); - if (print) printf("Cooling compton: %e \n", cool_compton ); - if (print) printf("Cooling U_dot: %e \n", U_dot ); - + + if (temp <= 1.01 * Chem_H.Temp_start && fabs(U_dot) < 0) U_dot = tiny; + if (fabs(U_dot) < tiny) U_dot = tiny; + + if (print) printf("HI: %e \n", TS.d_HI); + if (print) printf("HII: %e \n", TS.d_HII); + if (print) printf("HeI: %e \n", TS.d_HeI); + if (print) printf("HeII: %e \n", TS.d_HeII); + if (print) printf("HeIII: %e \n", TS.d_HeIII); + if (print) printf("de: %e \n", TS.d_e); + if (print) printf("Cooling ceHI: %e \n", cool_ceHI); + if (print) printf("Cooling ceHeI: %e \n", cool_ceHeI); + if (print) printf("Cooling ceHeII: %e \n", cool_ceHeII); + if (print) printf("Cooling ciHI: %e \n", cool_ciHI); + if (print) printf("Cooling ciHeI: %e \n", cool_ciHeI); + if (print) printf("Cooling ciHeII: %e \n", cool_ciHeII); + if (print) printf("Cooling ciHeIS: %e \n", cool_ciHeIS); + if (print) printf("Cooling reHII: %e \n", cool_reHII); + if (print) printf("Cooling reHeII1: %e \n", cool_reHeII1); + if (print) printf("Cooling reHeII2: %e \n", cool_reHeII2); + if (print) printf("Cooling reHeIII: %e \n", cool_reHeIII); + if (print) printf("Cooling brem: %e \n", cool_brem); + if (print) printf("Cooling piHI: %e rate: %e \n", photo_h_HI, photo_h_HI * TS.d_HI / dens_number_conv); + if (print) printf("Cooling piHeI: %e rate: %e \n", photo_h_HeI, photo_h_HeI * TS.d_HeI / dens_number_conv * 0.25); + if (print) printf("Cooling piHeII: %e rate: %e \n", photo_h_HeII, photo_h_HeII * TS.d_HeII / dens_number_conv * 0.25); + if (print) printf("Cooling DOM: %e \n", dens_number_conv); + if (print) printf("Cooling compton: %e \n", cool_compton); + if (print) printf("Cooling U_dot: %e \n", U_dot); return U_dot; - } -__device__ void Get_Reaction_Rates( Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI, Real &k_coll_i_HeII, - Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII, Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print ){ - +__device__ void Get_Reaction_Rates(Thermal_State &TS, Chemistry_Header &Chem_H, Real &k_coll_i_HI, Real &k_coll_i_HeI, + Real &k_coll_i_HeII, Real &k_coll_i_HI_HI, Real &k_coll_i_HI_HeI, Real &k_recomb_HII, + Real &k_recomb_HeII, Real &k_recomb_HeIII, bool print) +{ int temp_indx; Real temp, delta_T; - temp = TS.get_temperature( Chem_H.gamma ); - get_temperature_indx( temp, Chem_H, temp_indx, delta_T, temp, print ); - - k_coll_i_HI = interpolate_rate( Chem_H.k_coll_i_HI_d, temp_indx, delta_T ); - k_coll_i_HeI = interpolate_rate( Chem_H.k_coll_i_HeI_d, temp_indx, delta_T ); - k_coll_i_HeII = interpolate_rate( Chem_H.k_coll_i_HeII_d, temp_indx, delta_T ); - - k_coll_i_HI_HI = interpolate_rate( Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T ); - k_coll_i_HI_HeI = interpolate_rate( Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T ); - - k_recomb_HII = interpolate_rate( Chem_H.k_recomb_HII_d, temp_indx, delta_T ); - k_recomb_HeII = interpolate_rate( Chem_H.k_recomb_HeII_d, temp_indx, delta_T ); - k_recomb_HeIII = interpolate_rate( Chem_H.k_recomb_HeIII_d, temp_indx, delta_T ); - - if (print) printf("logT: %f temp_indx: %d\n", log(temp), temp_indx ); - if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI ); - if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI ); - if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII ); - if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI ); - if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI ); - if (print) printf("k_recomb_HII: %e \n", k_recomb_HII ); - if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII ); - if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII ); - + temp = TS.get_temperature(Chem_H.gamma); + get_temperature_indx(temp, Chem_H, temp_indx, delta_T, temp, print); + + k_coll_i_HI = interpolate_rate(Chem_H.k_coll_i_HI_d, temp_indx, delta_T); + k_coll_i_HeI = interpolate_rate(Chem_H.k_coll_i_HeI_d, temp_indx, delta_T); + k_coll_i_HeII = interpolate_rate(Chem_H.k_coll_i_HeII_d, temp_indx, delta_T); + + k_coll_i_HI_HI = interpolate_rate(Chem_H.k_coll_i_HI_HI_d, temp_indx, delta_T); + k_coll_i_HI_HeI = interpolate_rate(Chem_H.k_coll_i_HI_HeI_d, temp_indx, delta_T); + + k_recomb_HII = interpolate_rate(Chem_H.k_recomb_HII_d, temp_indx, delta_T); + k_recomb_HeII = interpolate_rate(Chem_H.k_recomb_HeII_d, temp_indx, delta_T); + k_recomb_HeIII = interpolate_rate(Chem_H.k_recomb_HeIII_d, temp_indx, delta_T); + + if (print) printf("logT: %f temp_indx: %d\n", log(temp), temp_indx); + if (print) printf("k_coll_i_HI: %e \n", k_coll_i_HI); + if (print) printf("k_coll_i_HeI: %e \n", k_coll_i_HeI); + if (print) printf("k_coll_i_HeII: %e \n", k_coll_i_HeII); + if (print) printf("k_coll_i_HI_HI: %e \n", k_coll_i_HI_HI); + if (print) printf("k_coll_i_HI_HeI: %e \n", k_coll_i_HI_HeI); + if (print) printf("k_recomb_HII: %e \n", k_recomb_HII); + if (print) printf("k_recomb_HeII: %e \n", k_recomb_HeII); + if (print) printf("k_recomb_HeIII: %e \n", k_recomb_HeIII); } -__device__ int Binary_Search( int N, Real val, float *data, int indx_l, int indx_r ){ +__device__ int Binary_Search(int N, Real val, float *data, int indx_l, int indx_r) +{ int n, indx; - n = indx_r - indx_l; - indx = indx_l + n/2; - if ( val >= data[N-1] ) return indx_r; - if ( val <= data[0] ) return indx_l; - if ( indx_r == indx_l + 1 ) return indx_l; - if ( data[indx] <= val ) indx_l = indx; - else indx_r = indx; - return Binary_Search( N, val, data, indx_l, indx_r ); + n = indx_r - indx_l; + indx = indx_l + n / 2; + if (val >= data[N - 1]) return indx_r; + if (val <= data[0]) return indx_l; + if (indx_r == indx_l + 1) return indx_l; + if (data[indx] <= val) + indx_l = indx; + else + indx_r = indx; + return Binary_Search(N, val, data, indx_l, indx_r); } -__device__ Real linear_interpolation( Real delta_x, int indx_l, int indx_r, float*array ){ - float v_l, v_r; +__device__ Real linear_interpolation(Real delta_x, int indx_l, int indx_r, float *array) +{ + float v_l, v_r; Real v; v_l = array[indx_l]; v_r = array[indx_r]; - v = delta_x * ( v_r - v_l ) + v_l; - return v; + v = delta_x * (v_r - v_l) + v_l; + return v; } -__device__ void Get_Current_UVB_Rates( Real current_z, Chemistry_Header &Chem_H, - float &photo_i_HI, float &photo_i_HeI, float &photo_i_HeII, - float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII, bool print ){ - - if ( current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]){ - photo_h_HI = 0; - photo_h_HeI = 0; - photo_h_HeII = 0; - photo_i_HI = 0; - photo_i_HeI = 0; - photo_i_HeII = 0; +__device__ void Get_Current_UVB_Rates(Real current_z, Chemistry_Header &Chem_H, float &photo_i_HI, float &photo_i_HeI, + float &photo_i_HeII, float &photo_h_HI, float &photo_h_HeI, float &photo_h_HeII, + bool print) +{ + if (current_z > Chem_H.uvb_rates_redshift_d[Chem_H.n_uvb_rates_samples - 1]) { + photo_h_HI = 0; + photo_h_HeI = 0; + photo_h_HeII = 0; + photo_i_HI = 0; + photo_i_HeI = 0; + photo_i_HeII = 0; return; - - } + } // Find closest value of z in rates_z such that z<=current_z int indx_l; Real z_l, z_r, delta_x; - indx_l = Binary_Search( Chem_H.n_uvb_rates_samples, current_z, Chem_H.uvb_rates_redshift_d, 0, Chem_H.n_uvb_rates_samples-1 ); - z_l = Chem_H.uvb_rates_redshift_d[indx_l]; - z_r = Chem_H.uvb_rates_redshift_d[indx_l+1]; - delta_x = (current_z - z_l) / ( z_r - z_l ); - - photo_i_HI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HI_rate_d ); - photo_i_HeI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeI_rate_d ); - photo_i_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_ion_HeII_rate_d ); - photo_h_HI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HI_rate_d ); - photo_h_HeI = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeI_rate_d ); - photo_h_HeII = linear_interpolation( delta_x, indx_l, indx_l+1, Chem_H.photo_heat_HeII_rate_d ); - -} + indx_l = Binary_Search(Chem_H.n_uvb_rates_samples, current_z, Chem_H.uvb_rates_redshift_d, 0, + Chem_H.n_uvb_rates_samples - 1); + z_l = Chem_H.uvb_rates_redshift_d[indx_l]; + z_r = Chem_H.uvb_rates_redshift_d[indx_l + 1]; + delta_x = (current_z - z_l) / (z_r - z_l); + + photo_i_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HI_rate_d); + photo_i_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HeI_rate_d); + photo_i_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_ion_HeII_rate_d); + photo_h_HI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HI_rate_d); + photo_h_HeI = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HeI_rate_d); + photo_h_HeII = linear_interpolation(delta_x, indx_l, indx_l + 1, Chem_H.photo_heat_HeII_rate_d); +} + +__device__ Real Get_Chemistry_dt(Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, + Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, + Real k_coll_i_HI_HeI, Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, + float photo_i_HI, float photo_i_HeI, float photo_i_HeII, int n_iter, Real HI_dot_prev, + Real e_dot_prev, Real t_chem, Real dt_hydro, bool print) +{ + Real dt, energy; + // Rate of change of HI + HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - photo_i_HI * TS.d_HI; + + // Rate of change of electron + e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI / 4.0 * TS.d_e + + k_coll_i_HeII * TS.d_HeII / 4.0 * TS.d_e + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + +k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI / 4.0 - k_recomb_HII * TS.d_HII * TS.d_e - + k_recomb_HeII * TS.d_HeII / 4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII / 4.0 * TS.d_e + photo_i_HI * TS.d_HI + + photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0; -__device__ Real Get_Chemistry_dt( Thermal_State &TS, Chemistry_Header &Chem_H, Real &HI_dot, Real &e_dot, Real U_dot, - Real k_coll_i_HI, Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, - Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, - float photo_i_HI, float photo_i_HeI, float photo_i_HeII, - int n_iter, Real HI_dot_prev, Real e_dot_prev, - Real t_chem, Real dt_hydro, bool print ){ - - Real dt, energy; - // Rate of change of HI - HI_dot = k_recomb_HII * TS.d_HII * TS.d_e - k_coll_i_HI * TS.d_HI * TS.d_e - - k_coll_i_HI_HI * TS.d_HI * TS.d_HI - k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 - - photo_i_HI * TS.d_HI; - - // Rate of change of electron - e_dot = k_coll_i_HI * TS.d_HI * TS.d_e + k_coll_i_HeI * TS.d_HeI/4.0 * TS.d_e + k_coll_i_HeII * TS.d_HeII/4.0 * TS.d_e - + k_coll_i_HI_HI * TS.d_HI * TS.d_HI + + k_coll_i_HI_HeI * TS.d_HI * TS.d_HeI/4.0 - - k_recomb_HII * TS.d_HII * TS.d_e - k_recomb_HeII * TS.d_HeII/4.0 * TS.d_e - k_recomb_HeIII * TS.d_HeIII/4.0 * TS.d_e - + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0; - // Bound from below to prevent numerical errors - if ( fabs(HI_dot) < tiny ) HI_dot = fmin( tiny, TS.d_HI ); - if ( fabs(e_dot) < tiny ) e_dot = fmin( tiny, TS.d_e ); - + if (fabs(HI_dot) < tiny) HI_dot = fmin(tiny, TS.d_HI); + if (fabs(e_dot) < tiny) e_dot = fmin(tiny, TS.d_e); + // If the net rate is almost perfectly balanced then set // it to zero (since it is zero to available precision) - if ( fmin( fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e) ) / fmax( fabs(HI_dot), fabs(e_dot) ) > 1e6 ){ + if (fmin(fabs(k_coll_i_HI * TS.d_HI * TS.d_e), fabs(k_recomb_HII * TS.d_HII * TS.d_e)) / + fmax(fabs(HI_dot), fabs(e_dot)) > + 1e6) { HI_dot = tiny; e_dot = tiny; } - - if ( n_iter > 50 ){ - HI_dot = fmin( fabs(HI_dot), fabs( HI_dot_prev) ); - e_dot = fmin( fabs(e_dot), fabs( e_dot_prev) ); + + if (n_iter > 50) { + HI_dot = fmin(fabs(HI_dot), fabs(HI_dot_prev)); + e_dot = fmin(fabs(e_dot), fabs(e_dot_prev)); } - - if ( TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0 ){ - printf( "#### Equlibrium \n" ); + + if (TS.d * Chem_H.dens_number_conv > 1e8 && U_dot > 0) { + printf("#### Equlibrium \n"); } - + #ifdef TEMPERATURE_FLOOR - if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); #endif - - energy = fmax( TS.U * TS.d, tiny ); - dt = fmin( fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ) ); - dt = fmin( fabs( 0.1 * energy / U_dot ), dt ); - dt = fmin( 0.5 * dt_hydro, dt ); - dt = fmin( dt_hydro - t_chem, dt ); - - if ( n_iter == Chem_H.max_iter-1 ){ - printf("##### Chem_GPU: dt_hydro: %e t_chem: %e dens: %e temp: %e GE: %e U_dot: %e dt_HI: %e dt_e: %e dt_U: %e \n", dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs( 0.1 * TS.d_HI / HI_dot ), fabs( 0.1 * TS.d_e / e_dot ), fabs( 0.1 * TS.U * TS.d / U_dot ) ) ; + + energy = fmax(TS.U * TS.d, tiny); + dt = fmin(fabs(0.1 * TS.d_HI / HI_dot), fabs(0.1 * TS.d_e / e_dot)); + dt = fmin(fabs(0.1 * energy / U_dot), dt); + dt = fmin(0.5 * dt_hydro, dt); + dt = fmin(dt_hydro - t_chem, dt); + + if (n_iter == Chem_H.max_iter - 1) { + printf( + "##### Chem_GPU: dt_hydro: %e t_chem: %e dens: %e temp: %e GE: " + "%e U_dot: %e dt_HI: %e dt_e: %e dt_U: %e \n", + dt_hydro, t_chem, TS.d, TS.get_temperature(Chem_H.gamma), energy, U_dot, fabs(0.1 * TS.d_HI / HI_dot), + fabs(0.1 * TS.d_e / e_dot), fabs(0.1 * TS.U * TS.d / U_dot)); } - - - if (print) printf("HIdot: %e\n", HI_dot ); - if (print) printf("edot: %e\n", e_dot ); - if (print) printf("energy: %e\n", TS.U * TS.d ); - if (print) printf("Udot: %e\n", U_dot ); - if (print) printf("dt_hydro: %e\n", dt_hydro ); - if (print) printf("dt: %e\n", dt ); - - return dt; - + + if (print) printf("HIdot: %e\n", HI_dot); + if (print) printf("edot: %e\n", e_dot); + if (print) printf("energy: %e\n", TS.U * TS.d); + if (print) printf("Udot: %e\n", U_dot); + if (print) printf("dt_hydro: %e\n", dt_hydro); + if (print) printf("dt: %e\n", dt); + + return dt; } -__device__ void Update_Step( Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, Real k_coll_i_HeI, - Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, - Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, - float photo_i_HI, float photo_i_HeI, float photo_i_HeII, - Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev, bool print ){ - - Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p; +__device__ void Update_Step(Thermal_State &TS, Chemistry_Header &Chem_H, Real dt, Real U_dot, Real k_coll_i_HI, + Real k_coll_i_HeI, Real k_coll_i_HeII, Real k_coll_i_HI_HI, Real k_coll_i_HI_HeI, + Real k_recomb_HII, Real k_recomb_HeII, Real k_recomb_HeIII, float photo_i_HI, + float photo_i_HeI, float photo_i_HeII, Real &HI_dot_prev, Real &e_dot_prev, Real &temp_prev, + bool print) +{ + Real d_HI_p, d_HII_p, d_HeI_p, d_HeII_p, d_HeIII_p, d_e_p; Real s_coef, a_coef; - + // Update HI s_coef = k_recomb_HII * TS.d_HII * TS.d_e; - a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI/4.0 + photo_i_HI; - d_HI_p = ( dt * s_coef + TS.d_HI ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, d_HI_p ); - + a_coef = k_coll_i_HI * TS.d_e + k_coll_i_HI_HI * TS.d_HI + k_coll_i_HI_HeI * TS.d_HeI / 4.0 + photo_i_HI; + d_HI_p = (dt * s_coef + TS.d_HI) / (1.0 + dt * a_coef); + if (print) printf("Update HI s_coef: %e a_coef: %e HIp: %e \n", s_coef, a_coef, d_HI_p); + // Update HII - s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 + photo_i_HI * d_HI_p; - a_coef = k_recomb_HII * TS.d_e; - d_HII_p = ( dt * s_coef + TS.d_HII ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, d_HII_p ); - + s_coef = k_coll_i_HI * d_HI_p * TS.d_e + k_coll_i_HI_HI * d_HI_p * d_HI_p + + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * d_HI_p; + a_coef = k_recomb_HII * TS.d_e; + d_HII_p = (dt * s_coef + TS.d_HII) / (1.0 + dt * a_coef); + if (print) printf("Update HII s_coef: %e a_coef: %e HIIp: %e \n", s_coef, a_coef, d_HII_p); + // Update electron - s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI/4.0 - + photo_i_HI * TS.d_HI + photo_i_HeI * TS.d_HeI/4.0 + photo_i_HeII * TS.d_HeII/4.0 ; - a_coef = - k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI/4.0 + k_recomb_HeII * TS.d_HeII/4.0 - - k_coll_i_HeII * TS.d_HeII/4.0 + k_recomb_HeIII * TS.d_HeIII/4.0; - d_e_p = ( dt * s_coef + TS.d_e ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, d_e_p ); - + s_coef = k_coll_i_HI_HI * d_HI_p * d_HI_p + k_coll_i_HI_HeI * d_HI_p * TS.d_HeI / 4.0 + photo_i_HI * TS.d_HI + + photo_i_HeI * TS.d_HeI / 4.0 + photo_i_HeII * TS.d_HeII / 4.0; + a_coef = -k_coll_i_HI * TS.d_HI + k_recomb_HII * TS.d_HII - k_coll_i_HeI * TS.d_HeI / 4.0 + + k_recomb_HeII * TS.d_HeII / 4.0 - k_coll_i_HeII * TS.d_HeII / 4.0 + k_recomb_HeIII * TS.d_HeIII / 4.0; + d_e_p = (dt * s_coef + TS.d_e) / (1.0 + dt * a_coef); + if (print) printf("Update e s_coef: %e a_coef: %e ep: %e \n", s_coef, a_coef, d_e_p); + // Update HeI - s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e; - a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI; - d_HeI_p = ( dt * s_coef + TS.d_HeI ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, d_HeI_p ); - + s_coef = k_recomb_HeII * TS.d_HeII * TS.d_e; + a_coef = k_coll_i_HeI * TS.d_e + photo_i_HeI; + d_HeI_p = (dt * s_coef + TS.d_HeI) / (1.0 + dt * a_coef); + if (print) printf("Update HeI s_coef: %e a_coef: %e HeIp: %e \n", s_coef, a_coef, d_HeI_p); + // Update HeII - s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; - a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII; - d_HeII_p = ( dt * s_coef + TS.d_HeII ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, a_coef, d_HeII_p ); - + s_coef = k_coll_i_HeI * d_HeI_p * TS.d_e + k_recomb_HeIII * TS.d_HeIII * TS.d_e + photo_i_HeI * d_HeI_p; + a_coef = k_recomb_HeII * TS.d_e + k_coll_i_HeII * TS.d_e + photo_i_HeII; + d_HeII_p = (dt * s_coef + TS.d_HeII) / (1.0 + dt * a_coef); + if (print) printf("Update HeII s_coef: %e a_coef: %e HeIIp: %e \n", s_coef, a_coef, d_HeII_p); + // Update HeIII - s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p; - a_coef = k_recomb_HeIII * TS.d_e; - d_HeIII_p = ( dt * s_coef + TS.d_HeIII ) / ( 1.0 + dt*a_coef ); - if ( print ) printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p ); - + s_coef = k_coll_i_HeII * d_HeII_p * TS.d_e + photo_i_HeII * d_HeII_p; + a_coef = k_recomb_HeIII * TS.d_e; + d_HeIII_p = (dt * s_coef + TS.d_HeIII) / (1.0 + dt * a_coef); + if (print) printf("Update HeIII s_coef: %e a_coef: %e HeIIIp: %e \n", s_coef, a_coef, d_HeIII_p); + // Record the temperature for the next step - temp_prev = TS.get_temperature( Chem_H.gamma ); - - HI_dot_prev = fabs( TS.d_HI - d_HI_p ) / fmax( dt, tiny ); - TS.d_HI = fmax( d_HI_p, tiny ); - TS.d_HII = fmax( d_HII_p, tiny ); - TS.d_HeI = fmax( d_HeI_p, tiny ); - TS.d_HeII = fmax( d_HeII_p, tiny ); - TS.d_HeIII = fmax( d_HeIII_p, 1e-5*tiny ); - + temp_prev = TS.get_temperature(Chem_H.gamma); + + HI_dot_prev = fabs(TS.d_HI - d_HI_p) / fmax(dt, tiny); + TS.d_HI = fmax(d_HI_p, tiny); + TS.d_HII = fmax(d_HII_p, tiny); + TS.d_HeI = fmax(d_HeI_p, tiny); + TS.d_HeII = fmax(d_HeII_p, tiny); + TS.d_HeIII = fmax(d_HeIII_p, 1e-5 * tiny); + // Use charge conservation to determine electron fraction e_dot_prev = TS.d_e; - TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0; - e_dot_prev = fabs( TS.d_e - e_dot_prev ) / fmax( dt, tiny ); - + TS.d_e = TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII / 2.0; + e_dot_prev = fabs(TS.d_e - e_dot_prev) / fmax(dt, tiny); + // Update internal energy TS.U += U_dot / TS.d * dt; #ifdef TEMPERATURE_FLOOR - if ( TS.get_temperature( Chem_H.gamma ) < TEMP_FLOOR ) TS.U = TS.compute_U( TEMP_FLOOR, Chem_H.gamma ); + if (TS.get_temperature(Chem_H.gamma) < TEMP_FLOOR) TS.U = TS.compute_U(TEMP_FLOOR, Chem_H.gamma); #endif - if ( print ) printf("Updated U: %e \n", TS.U); - - - } - + if (print) printf("Updated U: %e \n", TS.U); +} -__global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt_hydro, Chemistry_Header Chem_H ){ - - +__global__ void Update_Chemistry_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real dt_hydro, Chemistry_Header Chem_H) +{ int id, xid, yid, zid, n_cells, n_iter; Real d, d_inv, vx, vy, vz; Real GE, E_kin, dt_chem, t_chem; Real current_a, a3, a2; - + Real current_z, density_conv, energy_conv; - current_z = Chem_H.current_z; + current_z = Chem_H.current_z; density_conv = Chem_H.density_conversion; energy_conv = Chem_H.energy_conversion; - + Real U_dot, HI_dot, e_dot, HI_dot_prev, e_dot_prev, temp_prev; Real k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI; Real k_recomb_HII, k_recomb_HeII, k_recomb_HeIII; float photo_i_HI, photo_i_HeI, photo_i_HeII; float photo_h_HI, photo_h_HeI, photo_h_HeII; Real correct_H, correct_He; - - - n_cells = nx*ny*nz; - + + n_cells = nx * ny * nz; + // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; bool print; - + // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E_kin = 0.5*d*(vx*vx + vy*vy + vz*vz); - #ifdef DE - GE = dev_conserved[(n_fields-1)*n_cells + id]; - #else - GE = dev_conserved[4*n_cells + id] - E_kin; - #endif - + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + #ifdef DE + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + #else + GE = dev_conserved[4 * n_cells + id] - E_kin; + #endif + print = false; // if ( xid == n_ghost && yid == n_ghost && zid == n_ghost ) print = true; - + // Convert to cgs units - current_a = 1 / ( current_z + 1); - a2 = current_a * current_a; - a3 = a2 * current_a; - d *= density_conv / a3; - GE *= energy_conv / a2; - dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * 1000 * KPC / Chem_H.time_units; - // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * dt_hydro * Chem_H.time_units; - - // Initialize the thermal state - Thermal_State TS; - TS.d = dev_conserved[ id] / a3; - TS.d_HI = dev_conserved[ 5*n_cells + id] / a3; - TS.d_HII = dev_conserved[ 6*n_cells + id] / a3; - TS.d_HeI = dev_conserved[ 7*n_cells + id] / a3; - TS.d_HeII = dev_conserved[ 8*n_cells + id] / a3; - TS.d_HeIII = dev_conserved[ 9*n_cells + id] / a3; - TS.d_e = dev_conserved[10*n_cells + id] / a3; - TS.U = GE * d_inv * 1e-10; - + current_a = 1 / (current_z + 1); + a2 = current_a * current_a; + a3 = a2 * current_a; + d *= density_conv / a3; + GE *= energy_conv / a2; + dt_hydro = dt_hydro / Chem_H.time_units; + + #ifdef COSMOLOGY + dt_hydro *= current_a * current_a / Chem_H.H0 * 1000 * + KPC + #endif // COSMOLOGY + // dt_hydro = dt_hydro * current_a * current_a / Chem_H.H0 * + // 1000 * KPC / Chem_H.time_units; + // delta_a = Chem_H.H0 * sqrt( Chem_H.Omega_M/current_a + + // Chem_H.Omega_L*pow(current_a, 2) ) / ( 1000 * KPC ) * + // dt_hydro * Chem_H.time_units; + + // Initialize the thermal state + Thermal_State TS; + TS.d = dev_conserved[id] / a3; + TS.d_HI = dev_conserved[id + n_cells * grid_enum::HI_density] / a3; + TS.d_HII = dev_conserved[id + n_cells * grid_enum::HII_density] / a3; + TS.d_HeI = dev_conserved[id + n_cells * grid_enum::HeI_density] / a3; + TS.d_HeII = dev_conserved[id + n_cells * grid_enum::HeII_density] / a3; + TS.d_HeIII = dev_conserved[id + n_cells * grid_enum::HeIII_density] / a3; + TS.d_e = dev_conserved[id + n_cells * grid_enum::e_density] / a3; + TS.U = GE * d_inv * 1e-10; + // Ceiling species - TS.d_HI = fmax( TS.d_HI, tiny ); - TS.d_HII = fmax( TS.d_HII, tiny ); - TS.d_HeI = fmax( TS.d_HeI, tiny ); - TS.d_HeII = fmax( TS.d_HeII, tiny ); - TS.d_HeIII = fmax( TS.d_HeIII, 1e-5*tiny ); - TS.d_e = fmax( TS.d_e, tiny ); - + TS.d_HI = fmax(TS.d_HI, tiny); + TS.d_HII = fmax(TS.d_HII, tiny); + TS.d_HeI = fmax(TS.d_HeI, tiny); + TS.d_HeII = fmax(TS.d_HeII, tiny); + TS.d_HeIII = fmax(TS.d_HeIII, 1e-5 * tiny); + TS.d_e = fmax(TS.d_e, tiny); + // Compute temperature at first iteration - temp_prev = TS.get_temperature( Chem_H.gamma ); - + temp_prev = TS.get_temperature(Chem_H.gamma); + // if (print){ // printf("current_z: %f\n", current_z ); // printf("density_units: %e\n", Chem_H.density_units ); @@ -510,551 +531,486 @@ __global__ void Update_Chemistry_kernel( Real *dev_conserved, int nx, int ny, in // printf("energy: %e \n", TS.U*TS.d ); // printf("dt_hydro: %e \n", dt_hydro / Chem_H.time_units ); // } - + // Get the photoheating and photoionization rates at z=current_z - Get_Current_UVB_Rates( current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, - photo_h_HI, photo_h_HeI, photo_h_HeII, print ); - + Get_Current_UVB_Rates(current_z, Chem_H, photo_i_HI, photo_i_HeI, photo_i_HeII, photo_h_HI, photo_h_HeI, + photo_h_HeII, print); + HI_dot_prev = 0; - e_dot_prev = 0; - n_iter = 0; - t_chem = 0; - while ( t_chem < dt_hydro ){ - - if (print) printf("########################################## Iter %d \n", n_iter ); - - U_dot = Get_Cooling_Rates( TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, - photo_h_HI, photo_h_HeI, photo_h_HeII, print ); - - Get_Reaction_Rates( TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, - k_coll_i_HI_HI, k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print ); - - dt_chem = Get_Chemistry_dt( TS, Chem_H, HI_dot, e_dot, U_dot, - k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, - k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, - photo_i_HI, photo_i_HeI, photo_i_HeII, - n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print ); - - Update_Step( TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, - k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, - e_dot_prev, temp_prev, print ); - + e_dot_prev = 0; + n_iter = 0; + t_chem = 0; + while (t_chem < dt_hydro) { + if (print) printf("########################################## Iter %d \n", n_iter); + + U_dot = Get_Cooling_Rates(TS, Chem_H, Chem_H.dens_number_conv, current_z, temp_prev, photo_h_HI, photo_h_HeI, + photo_h_HeII, print); + + Get_Reaction_Rates(TS, Chem_H, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, + k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, print); + + dt_chem = + Get_Chemistry_dt(TS, Chem_H, HI_dot, e_dot, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, + k_coll_i_HI_HeI, k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, + photo_i_HeII, n_iter, HI_dot_prev, e_dot_prev, t_chem, dt_hydro, print); + + Update_Step(TS, Chem_H, dt_chem, U_dot, k_coll_i_HI, k_coll_i_HeI, k_coll_i_HeII, k_coll_i_HI_HI, k_coll_i_HI_HeI, + k_recomb_HII, k_recomb_HeII, k_recomb_HeIII, photo_i_HI, photo_i_HeI, photo_i_HeII, HI_dot_prev, + e_dot_prev, temp_prev, print); + t_chem += dt_chem; n_iter += 1; - if ( n_iter == Chem_H.max_iter ) break; - + if (n_iter == Chem_H.max_iter) break; } - if ( print ) printf("Chem_GPU: N Iter: %d\n", n_iter ); - + if (print) printf("Chem_GPU: N Iter: %d\n", n_iter); + // Make consistent abundances with the H and He density - correct_H = Chem_H.H_fraction * TS.d / ( TS.d_HI + TS.d_HII ); - correct_He = ( 1.0 - Chem_H.H_fraction ) * TS.d / ( TS.d_HeI + TS.d_HeII + TS.d_HeIII ); - TS.d_HI *= correct_H; - TS.d_HII *= correct_H; - TS.d_HeI *= correct_He; - TS.d_HeII *= correct_He; + correct_H = Chem_H.H_fraction * TS.d / (TS.d_HI + TS.d_HII); + correct_He = (1.0 - Chem_H.H_fraction) * TS.d / (TS.d_HeI + TS.d_HeII + TS.d_HeIII); + TS.d_HI *= correct_H; + TS.d_HII *= correct_H; + TS.d_HeI *= correct_He; + TS.d_HeII *= correct_He; TS.d_HeIII *= correct_He; - + // Use charge conservation to determine electron fractioan - TS.d_e = TS.d_HII + TS.d_HeII/4.0 + TS.d_HeIII/2.0; - + TS.d_e = TS.d_HII + TS.d_HeII / 4.0 + TS.d_HeIII / 2.0; + // Write the Updated Thermal State - dev_conserved[ 5*n_cells + id] = TS.d_HI * a3; - dev_conserved[ 6*n_cells + id] = TS.d_HII * a3; - dev_conserved[ 7*n_cells + id] = TS.d_HeI * a3; - dev_conserved[ 8*n_cells + id] = TS.d_HeII * a3; - dev_conserved[ 9*n_cells + id] = TS.d_HeIII * a3; - dev_conserved[10*n_cells + id] = TS.d_e * a3; - d = d / density_conv * a3; - GE = TS.U / d_inv / energy_conv * a2 / 1e-10; - dev_conserved[4*n_cells + id] = GE + E_kin; - #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] = GE; - #endif - - if ( print ) printf("###########################################\n" ); - if ( print ) printf("Updated HI: %e\n", TS.d_HI * a3 ); - if ( print ) printf("Updated HII: %e\n", TS.d_HII * a3 ); - if ( print ) printf("Updated HeI: %e\n", TS.d_HeI * a3 ); - if ( print ) printf("Updated HeII: %e\n", TS.d_HeII * a3 ); - if ( print ) printf("Updated HeIII: %e\n", TS.d_HeIII * a3 ); - if ( print ) printf("Updated e: %e\n", TS.d_e * a3 ); - if ( print ) printf("Updated GE: %e\n", dev_conserved[(n_fields-1)*n_cells + id] ); - if ( print ) printf("Updated E: %e\n", dev_conserved[4*n_cells + id] ); - + dev_conserved[id + n_cells * grid_enum::HI_density] = TS.d_HI * a3; + dev_conserved[id + n_cells * grid_enum::HII_density] = TS.d_HII * a3; + dev_conserved[id + n_cells * grid_enum::HeI_density] = TS.d_HeI * a3; + dev_conserved[id + n_cells * grid_enum::HeII_density] = TS.d_HeII * a3; + dev_conserved[id + n_cells * grid_enum::HeIII_density] = TS.d_HeIII * a3; + dev_conserved[id + n_cells * grid_enum::e_density] = TS.d_e * a3; + d = d / density_conv * a3; + GE = TS.U / d_inv / energy_conv * a2 / 1e-10; + dev_conserved[4 * n_cells + id] = GE + E_kin; + #ifdef DE + dev_conserved[(n_fields - 1) * n_cells + id] = GE; + #endif + + if (print) printf("###########################################\n"); + if (print) printf("Updated HI: %e\n", TS.d_HI * a3); + if (print) printf("Updated HII: %e\n", TS.d_HII * a3); + if (print) printf("Updated HeI: %e\n", TS.d_HeI * a3); + if (print) printf("Updated HeII: %e\n", TS.d_HeII * a3); + if (print) printf("Updated HeIII: %e\n", TS.d_HeIII * a3); + if (print) printf("Updated e: %e\n", TS.d_e * a3); + if (print) printf("Updated GE: %e\n", dev_conserved[(n_fields - 1) * n_cells + id]); + if (print) printf("Updated E: %e\n", dev_conserved[4 * n_cells + id]); } } -void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H){ - +void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Chemistry_Header &Chem_H) +{ float time; cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - - int ngrid = (nx*ny*nz - 1) / TPB_CHEM + 1; + + int ngrid = (nx * ny * nz - 1) / TPB_CHEM + 1; dim3 dim1dGrid(ngrid, 1, 1); - dim3 dim1dBlock(TPB_CHEM, 1, 1); - hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, Chem_H ); - - CudaCheckError(); + dim3 dim1dBlock(TPB_CHEM, 1, 1); + hipLaunchKernelGGL(Update_Chemistry_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + dt, Chem_H); + + GPU_Error_Check(); cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); - Chem_H.runtime_chemistry_step = (Real) time/1000; // (Convert ms to secs ) - + Chem_H.runtime_chemistry_step = (Real)time / 1000; // (Convert ms to secs ) } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Reaction and cooling rates from Grackle - -//Kelvin to eV conversion factor -#ifndef tevk -#define tevk 1.1605e4 -#endif -//Comparison value -#ifndef dhuge -#define dhuge 1.0e30 -#endif -//Small value -#ifndef tiny -#define tiny 1.0e-20 -#endif -// Boltzmann's constant -#ifndef kboltz -#define kboltz 1.3806504e-16 //Boltzmann's constant [cm2gs-2K-1] or [ergK-1] -#endif + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Reaction and cooling rates from Grackle + // Kelvin to eV conversion factor + #ifndef tevk + #define tevk 1.1605e4 + #endif + // Comparison value + #ifndef dhuge + #define dhuge 1.0e30 + #endif + // Small value + #ifndef tiny + #define tiny 1.0e-20 + #endif + // Boltzmann's constant + #ifndef kboltz + #define kboltz 1.3806504e-16 // Boltzmann's constant [cm2gs-2K-1] or [ergK-1] + #endif // Calculation of k1 (HI + e --> HII + 2e) // k1_rate -__device__ Real coll_i_HI_rate( Real T, Real units ) -{ - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - - Real k1 = exp( -32.71396786375 - + 13.53655609057*logT_ev - - 5.739328757388*pow(logT_ev, 2) - + 1.563154982022*pow(logT_ev, 3) - - 0.2877056004391*pow(logT_ev, 4) - + 0.03482559773736999*pow(logT_ev, 5) - - 0.00263197617559*pow(logT_ev, 6) - + 0.0001119543953861*pow(logT_ev, 7) - - 2.039149852002e-6*pow(logT_ev, 8)) / units; - if (T_ev <= 0.8){ - k1 = fmax(tiny, k1); - } - return k1; +__device__ Real coll_i_HI_rate(Real T, Real units) +{ + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + + Real k1 = exp(-32.71396786375 + 13.53655609057 * logT_ev - 5.739328757388 * pow(logT_ev, 2) + + 1.563154982022 * pow(logT_ev, 3) - 0.2877056004391 * pow(logT_ev, 4) + + 0.03482559773736999 * pow(logT_ev, 5) - 0.00263197617559 * pow(logT_ev, 6) + + 0.0001119543953861 * pow(logT_ev, 7) - 2.039149852002e-6 * pow(logT_ev, 8)) / + units; + if (T_ev <= 0.8) { + k1 = fmax(tiny, k1); + } + return k1; } -//Calculation of k3 (HeI + e --> HeII + 2e) -// k3_rate -__device__ Real coll_i_HeI_rate( Real T, Real units ) +// Calculation of k3 (HeI + e --> HeII + 2e) +// k3_rate +__device__ Real coll_i_HeI_rate(Real T, Real units) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); - if (T_ev > 0.8){ - return exp( -44.09864886561001 - + 23.91596563469*logT_ev - - 10.75323019821*pow(logT_ev, 2) - + 3.058038757198*pow(logT_ev, 3) - - 0.5685118909884001*pow(logT_ev, 4) - + 0.06795391233790001*pow(logT_ev, 5) - - 0.005009056101857001*pow(logT_ev, 6) - + 0.0002067236157507*pow(logT_ev, 7) - - 3.649161410833e-6*pow(logT_ev, 8)) / units; - } else { - return tiny; - } -} + if (T_ev > 0.8) { + return exp(-44.09864886561001 + 23.91596563469 * logT_ev - 10.75323019821 * pow(logT_ev, 2) + + 3.058038757198 * pow(logT_ev, 3) - 0.5685118909884001 * pow(logT_ev, 4) + + 0.06795391233790001 * pow(logT_ev, 5) - 0.005009056101857001 * pow(logT_ev, 6) + + 0.0002067236157507 * pow(logT_ev, 7) - 3.649161410833e-6 * pow(logT_ev, 8)) / + units; + } else { + return tiny; + } +} -//Calculation of k4 (HeII + e --> HeI + photon) -// k4_rate -__device__ Real recomb_HeII_rate( Real T, Real units, bool use_case_B ) +// Calculation of k4 (HeII + e --> HeI + photon) +// k4_rate +__device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - //If case B recombination on. - if (use_case_B){ - return 1.26e-14 * pow(5.7067e5/T, 0.75) / units; - } + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + // If case B recombination on. + if (use_case_B) { + return 1.26e-14 * pow(5.7067e5 / T, 0.75) / units; + } - //If case B recombination off. - if (T_ev > 0.8){ - return (1.54e-9*(1.0 + 0.3 / exp(8.099328789667/T_ev)) - / (exp(40.49664394833662/T_ev)*pow(T_ev, 1.5)) - + 3.92e-13/pow(T_ev, 0.6353)) / units; - } else { - return 3.92e-13/pow(T_ev, 0.6353) / units; - } + // If case B recombination off. + if (T_ev > 0.8) { + return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + + 3.92e-13 / pow(T_ev, 0.6353)) / + units; + } else { + return 3.92e-13 / pow(T_ev, 0.6353) / units; + } } // k4_rate Case A -__device__ Real recomb_HeII_rate_case_A( Real T, Real units ) -{ - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); - if (T_ev > 0.8){ - return (1.54e-9*(1.0 + 0.3 / exp(8.099328789667/T_ev)) - / (exp(40.49664394833662/T_ev)*pow(T_ev, 1.5)) - + 3.92e-13/pow(T_ev, 0.6353)) / units; - } else { - return 3.92e-13/pow(T_ev, 0.6353) / units; - } +__device__ Real recomb_HeII_rate_case_A(Real T, Real units) +{ + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); + if (T_ev > 0.8) { + return (1.54e-9 * (1.0 + 0.3 / exp(8.099328789667 / T_ev)) / (exp(40.49664394833662 / T_ev) * pow(T_ev, 1.5)) + + 3.92e-13 / pow(T_ev, 0.6353)) / + units; + } else { + return 3.92e-13 / pow(T_ev, 0.6353) / units; + } } // k4_rate Case B -__device__ Real recomb_HeII_rate_case_B( Real T, Real units ) +__device__ Real recomb_HeII_rate_case_B(Real T, Real units) { - //If case B recombination on. - return 1.26e-14 * pow(5.7067e5/T, 0.75) / units; + // If case B recombination on. + return 1.26e-14 * pow(5.7067e5 / T, 0.75) / units; } - -//Calculation of k2 (HII + e --> HI + photon) -// k2_rate -__device__ Real recomb_HII_rate( Real T, Real units, bool use_case_B ) +// Calculation of k2 (HII + e --> HI + photon) +// k2_rate +__device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B) { - if (use_case_B) { - if (T < 1.0e9) { - return 4.881357e-6*pow(T, -1.5) \ - * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units; - } else { - return tiny; - } + if (use_case_B) { + if (T < 1.0e9) { + return 4.881357e-6 * pow(T, -1.5) * pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; } else { - if (T > 5500) { - //Convert temperature to appropriate form. - Real T_ev = T / tevk; - Real logT_ev = log(T_ev); - - return exp( -28.61303380689232 \ - - 0.7241125657826851*logT_ev \ - - 0.02026044731984691*pow(logT_ev, 2) \ - - 0.002380861877349834*pow(logT_ev, 3) \ - - 0.0003212605213188796*pow(logT_ev, 4) \ - - 0.00001421502914054107*pow(logT_ev, 5) \ - + 4.989108920299513e-6*pow(logT_ev, 6) \ - + 5.755614137575758e-7*pow(logT_ev, 7) \ - - 1.856767039775261e-8*pow(logT_ev, 8) \ - - 3.071135243196595e-9*pow(logT_ev, 9)) / units; - } else { - return recomb_HeII_rate(T, units, use_case_B); - } + return tiny; } -} -// k2_rate Case A -__device__ Real recomb_HII_rate_case_A( Real T, Real units ) -{ + } else { if (T > 5500) { - //Convert temperature to appropriate form. - Real T_ev = T / tevk; - Real logT_ev = log(T_ev); - - return exp( -28.61303380689232 \ - - 0.7241125657826851*logT_ev \ - - 0.02026044731984691*pow(logT_ev, 2) \ - - 0.002380861877349834*pow(logT_ev, 3) \ - - 0.0003212605213188796*pow(logT_ev, 4) \ - - 0.00001421502914054107*pow(logT_ev, 5) \ - + 4.989108920299513e-6*pow(logT_ev, 6) \ - + 5.755614137575758e-7*pow(logT_ev, 7) \ - - 1.856767039775261e-8*pow(logT_ev, 8) \ - - 3.071135243196595e-9*pow(logT_ev, 9)) / units; + // Convert temperature to appropriate form. + Real T_ev = T / tevk; + Real logT_ev = log(T_ev); + + return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - 0.02026044731984691 * pow(logT_ev, 2) - + 0.002380861877349834 * pow(logT_ev, 3) - 0.0003212605213188796 * pow(logT_ev, 4) - + 0.00001421502914054107 * pow(logT_ev, 5) + 4.989108920299513e-6 * pow(logT_ev, 6) + + 5.755614137575758e-7 * pow(logT_ev, 7) - 1.856767039775261e-8 * pow(logT_ev, 8) - + 3.071135243196595e-9 * pow(logT_ev, 9)) / + units; } else { - return recomb_HeII_rate_case_A(T, units ); + return recomb_HeII_rate(T, units, use_case_B); } + } +} +// k2_rate Case A +__device__ Real recomb_HII_rate_case_A(Real T, Real units) +{ + if (T > 5500) { + // Convert temperature to appropriate form. + Real T_ev = T / tevk; + Real logT_ev = log(T_ev); + + return exp(-28.61303380689232 - 0.7241125657826851 * logT_ev - 0.02026044731984691 * pow(logT_ev, 2) - + 0.002380861877349834 * pow(logT_ev, 3) - 0.0003212605213188796 * pow(logT_ev, 4) - + 0.00001421502914054107 * pow(logT_ev, 5) + 4.989108920299513e-6 * pow(logT_ev, 6) + + 5.755614137575758e-7 * pow(logT_ev, 7) - 1.856767039775261e-8 * pow(logT_ev, 8) - + 3.071135243196595e-9 * pow(logT_ev, 9)) / + units; + } else { + return recomb_HeII_rate_case_A(T, units); + } } // k2_rate Case B -__device__ Real recomb_HII_rate_case_B( Real T, Real units ) +__device__ Real recomb_HII_rate_case_B(Real T, Real units) { - if (T < 1.0e9) { - return 4.881357e-6*pow(T, -1.5) \ - * pow((1.0 + 1.14813e2*pow(T, -0.407)), -2.242) / units; - } else { - return tiny; - } + if (T < 1.0e9) { + return 4.881357e-6 * pow(T, -1.5) * pow((1.0 + 1.14813e2 * pow(T, -0.407)), -2.242) / units; + } else { + return tiny; + } } - -//Calculation of k5 (HeII + e --> HeIII + 2e) -// k5_rate -__device__ Real coll_i_HeII_rate( Real T, Real units ) +// Calculation of k5 (HeII + e --> HeIII + 2e) +// k5_rate +__device__ Real coll_i_HeII_rate(Real T, Real units) { - Real T_ev = T / 11605.0; - Real logT_ev = log(T_ev); + Real T_ev = T / 11605.0; + Real logT_ev = log(T_ev); - Real k5; - if (T_ev > 0.8){ - k5 = exp(-68.71040990212001 - + 43.93347632635*logT_ev - - 18.48066993568*pow(logT_ev, 2) - + 4.701626486759002*pow(logT_ev, 3) - - 0.7692466334492*pow(logT_ev, 4) - + 0.08113042097303*pow(logT_ev, 5) - - 0.005324020628287001*pow(logT_ev, 6) - + 0.0001975705312221*pow(logT_ev, 7) - - 3.165581065665e-6*pow(logT_ev, 8)) / units; - } else { - k5 = tiny; - } - return k5; + Real k5; + if (T_ev > 0.8) { + k5 = exp(-68.71040990212001 + 43.93347632635 * logT_ev - 18.48066993568 * pow(logT_ev, 2) + + 4.701626486759002 * pow(logT_ev, 3) - 0.7692466334492 * pow(logT_ev, 4) + + 0.08113042097303 * pow(logT_ev, 5) - 0.005324020628287001 * pow(logT_ev, 6) + + 0.0001975705312221 * pow(logT_ev, 7) - 3.165581065665e-6 * pow(logT_ev, 8)) / + units; + } else { + k5 = tiny; + } + return k5; } -//Calculation of k6 (HeIII + e --> HeII + photon) -// k6_rate -__device__ Real recomb_HeIII_rate( Real T, Real units, bool use_case_B ) +// Calculation of k6 (HeIII + e --> HeII + photon) +// k6_rate +__device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B) { - Real k6; - //Has case B recombination setting. - if (use_case_B) { - if (T < 1.0e9) { - k6 = 7.8155e-5*pow(T, -1.5) - * pow((1.0 + 2.0189e2*pow(T, -0.407)), -2.242) / units; - } else { - k6 = tiny; - } + Real k6; + // Has case B recombination setting. + if (use_case_B) { + if (T < 1.0e9) { + k6 = 7.8155e-5 * pow(T, -1.5) * pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; } else { - k6 = 3.36e-10/sqrt(T)/pow(T/1.0e3, 0.2) - / (1.0 + pow(T/1.0e6, 0.7)) / units; + k6 = tiny; } - return k6; + } else { + k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / units; + } + return k6; } // k6_rate Case A -__device__ Real recomb_HeIII_rate_case_A( Real T, Real units ) +__device__ Real recomb_HeIII_rate_case_A(Real T, Real units) { - Real k6; - //Has case B recombination setting. - k6 = 3.36e-10/sqrt(T)/pow(T/1.0e3, 0.2) - / (1.0 + pow(T/1.0e6, 0.7)) / units; - return k6; + Real k6; + // Has case B recombination setting. + k6 = 3.36e-10 / sqrt(T) / pow(T / 1.0e3, 0.2) / (1.0 + pow(T / 1.0e6, 0.7)) / units; + return k6; } // k6_rate Case B -__device__ Real recomb_HeIII_rate_case_B( Real T, Real units ) -{ - Real k6; - //Has case B recombination setting. - if (T < 1.0e9) { - k6 = 7.8155e-5*pow(T, -1.5) - * pow((1.0 + 2.0189e2*pow(T, -0.407)), -2.242) / units; - } else { - k6 = tiny; - } - return k6; +__device__ Real recomb_HeIII_rate_case_B(Real T, Real units) +{ + Real k6; + // Has case B recombination setting. + if (T < 1.0e9) { + k6 = 7.8155e-5 * pow(T, -1.5) * pow((1.0 + 2.0189e2 * pow(T, -0.407)), -2.242) / units; + } else { + k6 = tiny; + } + return k6; } -//Calculation of k57 (HI + HI --> HII + HI + e) -// k57_rate -__device__ Real coll_i_HI_HI_rate( Real T, Real units ) +// Calculation of k57 (HI + HI --> HII + HI + e) +// k57_rate +__device__ Real coll_i_HI_HI_rate(Real T, Real units) { - // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). - // k57 value based on experimental cross-sections from Gealy & van Zyl (1987). - if (T > 3.0e3) { - return 1.2e-17 * pow(T, 1.2) * exp(-1.578e5 / T) / units; - } else { - return tiny; - } + // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). + // k57 value based on experimental cross-sections from Gealy & van Zyl (1987). + if (T > 3.0e3) { + return 1.2e-17 * pow(T, 1.2) * exp(-1.578e5 / T) / units; + } else { + return tiny; + } } -//Calculation of k58 (HI + HeI --> HII + HeI + e) -// k58_rate -__device__ Real coll_i_HI_HeI_rate( Real T, Real units ) +// Calculation of k58 (HI + HeI --> HII + HeI + e) +// k58_rate +__device__ Real coll_i_HI_HeI_rate(Real T, Real units) { - // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). - // k58 value based on cross-sections from van Zyl, Le & Amme (1981). - if (T > 3.0e3) { - return 1.75e-17 * pow(T, 1.3) * exp(-1.578e5 / T) / units; - } else { - return tiny; - } + // These rate coefficients are from Lenzuni, Chernoff & Salpeter (1991). + // k58 value based on cross-sections from van Zyl, Le & Amme (1981). + if (T > 3.0e3) { + return 1.75e-17 * pow(T, 1.3) * exp(-1.578e5 / T) / units; + } else { + return tiny; + } } -//Calculation of ceHI. -// Cooling collisional excitation HI -__host__ __device__ Real cool_ceHI_rate( Real T, Real units ) +// Calculation of ceHI. +// Cooling collisional excitation HI +__host__ __device__ Real cool_ceHI_rate(Real T, Real units) { - return 7.5e-19*exp( -fmin(log(dhuge), 118348.0 / T) ) - / ( 1.0 + sqrt(T / 1.0e5) ) / units; + return 7.5e-19 * exp(-fmin(log(dhuge), 118348.0 / T)) / (1.0 + sqrt(T / 1.0e5)) / units; } -//Calculation of ceHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ceHeI_rate( Real T, Real units ) +// Calculation of ceHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ceHeI_rate(Real T, Real units) { - return 9.1e-27*exp(-fmin(log(dhuge), 13179.0/T)) - * pow(T, -0.1687) / ( 1.0 + sqrt(T/1.0e5) ) / units; + return 9.1e-27 * exp(-fmin(log(dhuge), 13179.0 / T)) * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) / units; } -//Calculation of ceHeII. -// Cooling collisional excitation HeII -__host__ __device__ Real cool_ceHeII_rate( Real T, Real units ) +// Calculation of ceHeII. +// Cooling collisional excitation HeII +__host__ __device__ Real cool_ceHeII_rate(Real T, Real units) { - return 5.54e-17*exp(-fmin(log(dhuge), 473638.0/T)) - * pow(T, -0.3970) / ( 1.0 + sqrt(T/1.0e5) ) / units; + return 5.54e-17 * exp(-fmin(log(dhuge), 473638.0 / T)) * pow(T, -0.3970) / (1.0 + sqrt(T / 1.0e5)) / units; } -//Calculation of ciHeIS. -// Cooling collisional ionization HeIS -__host__ __device__ Real cool_ciHeIS_rate( Real T, Real units ) +// Calculation of ciHeIS. +// Cooling collisional ionization HeIS +__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units) { - return 5.01e-27*pow(T, -0.1687) / ( 1.0 + sqrt(T/1.0e5) ) - * exp(-fmin(log(dhuge), 55338.0/T)) / units; + return 5.01e-27 * pow(T, -0.1687) / (1.0 + sqrt(T / 1.0e5)) * exp(-fmin(log(dhuge), 55338.0 / T)) / units; } -//Calculation of ciHI. -// Cooling collisional ionization HI -__host__ __device__ Real cool_ciHI_rate( Real T, Real units ) +// Calculation of ciHI. +// Cooling collisional ionization HI +__host__ __device__ Real cool_ciHI_rate(Real T, Real units) { - //Collisional ionization. Polynomial fit from Tom Abel. - return 2.18e-11 * coll_i_HI_rate(T, 1) / units; + // Collisional ionization. Polynomial fit from Tom Abel. + return 2.18e-11 * coll_i_HI_rate(T, 1) / units; } - -//Calculation of ciHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ciHeI_rate( Real T, Real units ) +// Calculation of ciHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ciHeI_rate(Real T, Real units) { - //Collisional ionization. Polynomial fit from Tom Abel. - return 3.94e-11 * coll_i_HeI_rate(T, 1) / units; + // Collisional ionization. Polynomial fit from Tom Abel. + return 3.94e-11 * coll_i_HeI_rate(T, 1) / units; } -//Calculation of ciHeII. -// Cooling collisional ionization HeII -__host__ __device__ Real cool_ciHeII_rate( Real T, Real units ) +// Calculation of ciHeII. +// Cooling collisional ionization HeII +__host__ __device__ Real cool_ciHeII_rate(Real T, Real units) { - //Collisional ionization. Polynomial fit from Tom Abel. - return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; + // Collisional ionization. Polynomial fit from Tom Abel. + return 8.72e-11 * coll_i_HeII_rate(T, 1) / units; } -//Calculation of reHII. -// Cooling recombination HII -__host__ __device__ Real cool_reHII_rate( Real T, Real units, bool use_case_B ) +// Calculation of reHII. +// Cooling recombination HII +__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B) { - Real lambdaHI = 2.0 * 157807.0 / T; - if (use_case_B) { - return 3.435e-30 * T * pow(lambdaHI, 1.970) - / pow( 1.0 + pow(lambdaHI/2.25, 0.376), 3.720) - / units; - } else { - return 1.778e-29 * T * pow(lambdaHI, 1.965) - / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697) - / units; - } + Real lambdaHI = 2.0 * 157807.0 / T; + if (use_case_B) { + return 3.435e-30 * T * pow(lambdaHI, 1.970) / pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; + } else { + return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; + } } -//Calculation of reHII. -// Cooling recombination HII Case A -__host__ __device__ Real cool_reHII_rate_case_A( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HII Case A +__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units) { - Real lambdaHI = 2.0 * 157807.0 / T; - return 1.778e-29 * T * pow(lambdaHI, 1.965) - / pow(1.0 + pow(lambdaHI/0.541, 0.502), 2.697) - / units; + Real lambdaHI = 2.0 * 157807.0 / T; + return 1.778e-29 * T * pow(lambdaHI, 1.965) / pow(1.0 + pow(lambdaHI / 0.541, 0.502), 2.697) / units; } -//Calculation of reHII. -// Cooling recombination HII Case B -__host__ __device__ Real cool_reHII_rate_case_B( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HII Case B +__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units) { - Real lambdaHI = 2.0 * 157807.0 / T; - return 3.435e-30 * T * pow(lambdaHI, 1.970) - / pow( 1.0 + pow(lambdaHI/2.25, 0.376), 3.720) - / units; + Real lambdaHI = 2.0 * 157807.0 / T; + return 3.435e-30 * T * pow(lambdaHI, 1.970) / pow(1.0 + pow(lambdaHI / 2.25, 0.376), 3.720) / units; } -//Calculation of reHII. -// Cooling recombination HeII -__host__ __device__ Real cool_reHeII1_rate( Real T, Real units, bool use_case_B ) +// Calculation of reHII. +// Cooling recombination HeII +__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B) { - Real lambdaHeII = 2.0 * 285335.0 / T; - if ( use_case_B ) { - return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) - / units; - } else { - return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) - / units; - } + Real lambdaHeII = 2.0 * 285335.0 / T; + if (use_case_B) { + return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) / units; + } else { + return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units; + } } -//Calculation of reHII. -// Cooling recombination HeII Case A -__host__ __device__ Real cool_reHeII1_rate_case_A( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HeII Case A +__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units) { - Real lambdaHeII = 2.0 * 285335.0 / T; - return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) - / units; + Real lambdaHeII = 2.0 * 285335.0 / T; + return 3e-14 * kboltz * T * pow(lambdaHeII, 0.654) / units; } -//Calculation of reHII. -// Cooling recombination HeII Case B -__host__ __device__ Real cool_reHeII1_rate_case_B( Real T, Real units ) +// Calculation of reHII. +// Cooling recombination HeII Case B +__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units) { - Real lambdaHeII = 2.0 * 285335.0 / T; - return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) - / units; + Real lambdaHeII = 2.0 * 285335.0 / T; + return 1.26e-14 * kboltz * T * pow(lambdaHeII, 0.75) / units; } -//Calculation of reHII2. -// Cooling recombination HeII Dielectronic -__host__ __device__ Real cool_reHeII2_rate( Real T, Real units ) +// Calculation of reHII2. +// Cooling recombination HeII Dielectronic +__host__ __device__ Real cool_reHeII2_rate(Real T, Real units) { - //Dielectronic recombination (Cen, 1992). - return 1.24e-13 * pow(T, -1.5) - * exp( -fmin(log(dhuge), 470000.0 / T) ) - * ( 1.0 + 0.3 * exp( -fmin(log(dhuge), 94000.0 / T) ) ) - / units; + // Dielectronic recombination (Cen, 1992). + return 1.24e-13 * pow(T, -1.5) * exp(-fmin(log(dhuge), 470000.0 / T)) * + (1.0 + 0.3 * exp(-fmin(log(dhuge), 94000.0 / T))) / units; } -//Calculation of reHIII. -// Cooling recombination HeIII -__host__ __device__ Real cool_reHeIII_rate( Real T, Real units, bool use_case_B ) +// Calculation of reHIII. +// Cooling recombination HeIII +__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B) { - Real lambdaHeIII = 2.0 * 631515.0 / T; - if ( use_case_B ) { - return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) - / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) - / units; - } else { - return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) - / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) - / units; - } + Real lambdaHeIII = 2.0 * 631515.0 / T; + if (use_case_B) { + return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; + } else { + return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; + } } -//Calculation of reHIII. -// Cooling recombination HeIII Case A -__host__ __device__ Real cool_reHeIII_rate_case_A( Real T, Real units ) +// Calculation of reHIII. +// Cooling recombination HeIII Case A +__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units) { - Real lambdaHeIII = 2.0 * 631515.0 / T; - return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) - / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) - / units; + Real lambdaHeIII = 2.0 * 631515.0 / T; + return 8.0 * 1.778e-29 * T * pow(lambdaHeIII, 1.965) / pow(1.0 + pow(lambdaHeIII / 0.541, 0.502), 2.697) / units; } -//Calculation of reHIII. -// Cooling recombination HeIII Case B -__host__ __device__ Real cool_reHeIII_rate_case_B( Real T, Real units ) +// Calculation of reHIII. +// Cooling recombination HeIII Case B +__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units) { - Real lambdaHeIII = 2.0 * 631515.0 / T; - return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) - / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) - / units; + Real lambdaHeIII = 2.0 * 631515.0 / T; + return 8.0 * 3.435e-30 * T * pow(lambdaHeIII, 1.970) / pow(1.0 + pow(lambdaHeIII / 2.25, 0.376), 3.720) / units; } -//Calculation of brem. -// Cooling Bremsstrahlung -__host__ __device__ Real cool_brem_rate( Real T, Real units ) +// Calculation of brem. +// Cooling Bremsstrahlung +__host__ __device__ Real cool_brem_rate(Real T, Real units) { - return 1.43e-27 * sqrt(T) - * ( 1.1 + 0.34 * exp( -pow(5.5 - log10(T), 2) / 3.0) ) - / units; + return 1.43e-27 * sqrt(T) * (1.1 + 0.34 * exp(-pow(5.5 - log10(T), 2) / 3.0)) / units; } - - - - -#endif \ No newline at end of file +#endif diff --git a/src/chemistry_gpu/chemistry_gpu.h b/src/chemistry_gpu/chemistry_gpu.h index 751059f07..79674c3a0 100644 --- a/src/chemistry_gpu/chemistry_gpu.h +++ b/src/chemistry_gpu/chemistry_gpu.h @@ -1,26 +1,23 @@ #ifndef CHEMISTRY_GPU_H #define CHEMISTRY_GPU_H -#include"../global/global.h" +#include "../global/global.h" #define CHEM_TINY 1e-20 -//Define the type of a generic rate function. -typedef Real (*Rate_Function_T)( Real, Real ); - +// Define the type of a generic rate function. +typedef Real (*Rate_Function_T)(Real, Real); // #define TEXTURES_UVB_INTERPOLATION -struct Chemistry_Header -{ +struct ChemistryHeader { Real gamma; Real density_conversion; Real energy_conversion; Real current_z; Real runtime_chemistry_step; Real H_fraction; - - + // Units system Real a_value; Real density_units; @@ -30,45 +27,45 @@ struct Chemistry_Header Real cooling_units; Real reaction_units; Real dens_number_conv; - + // Cosmological parameters Real H0; Real Omega_M; Real Omega_L; - + // Interpolation tables for the rates - int N_Temp_bins; + int N_Temp_bins; Real Temp_start; Real Temp_end; - + Real *cool_ceHI_d; Real *cool_ceHeI_d; Real *cool_ceHeII_d; - + Real *cool_ciHI_d; Real *cool_ciHeI_d; Real *cool_ciHeII_d; Real *cool_ciHeIS_d; - + Real *cool_reHII_d; - Real *cool_reHeII1_d; - Real *cool_reHeII2_d; + Real *cool_reHeII_1_d; + Real *cool_reHeII_2_d; Real *cool_reHeIII_d; - + Real *cool_brem_d; - + Real cool_compton; - + Real *k_coll_i_HI_d; Real *k_coll_i_HeI_d; Real *k_coll_i_HeII_d; Real *k_coll_i_HI_HI_d; Real *k_coll_i_HI_HeI_d; - + Real *k_recomb_HII_d; Real *k_recomb_HeII_d; Real *k_recomb_HeIII_d; - + int max_iter; int n_uvb_rates_samples; @@ -79,32 +76,26 @@ struct Chemistry_Header float *photo_heat_HI_rate_d; float *photo_heat_HeI_rate_d; float *photo_heat_HeII_rate_d; - }; - - - #ifdef CHEMISTRY_GPU class Chem_GPU { -public: - + public: int nx; int ny; int nz; - - + bool use_case_B_recombination; - + Real scale_factor_UVB_on; float *cosmo_params_h; float *cosmo_params_d; - + int n_uvb_rates_samples; - float *rates_z_h; + float *rates_z_h; float *Heat_rates_HI_h; float *Heat_rates_HeI_h; float *Heat_rates_HeII_h; @@ -119,52 +110,49 @@ class Chem_GPU float *Ion_rates_HI_d; float *Ion_rates_HeI_d; float *Ion_rates_HeII_d; - + struct Chemistry_Header H; - - - struct Fields - { + + struct Fields { Real *temperature_h; } Fields; - - - void Allocate_Array_GPU_Real( Real **array_dev, int size ); - void Copy_Real_Array_to_Device( int size, Real *array_h, Real *array_d ); - void Free_Array_GPU_Real( Real *array_dev ); - void Allocate_Array_GPU_float( float **array_dev, int size ); - void Copy_Float_Array_to_Device( int size, float *array_h, float *array_d ); - void Free_Array_GPU_float( float *array_dev ); - - void Initialize( struct parameters *P ); - - void Generate_Reaction_Rate_Table( Real **rate_table_array_d, Rate_Function_T rate_function, Real units ); - + + void Allocate_Array_GPU_Real(Real **array_dev, int size); + void Copy_Real_Array_to_Device(int size, Real *array_h, Real *array_d); + void Free_Array_GPU_Real(Real *array_dev); + void Allocate_Array_GPU_float(float **array_dev, int size); + void Copy_Float_Array_to_Device(int size, float *array_h, float *array_d); + void Free_Array_GPU_float(float *array_dev); + + void Initialize(struct Parameters *P); + + void Generate_Reaction_Rate_Table(Real **rate_table_array_d, Rate_Function_T rate_function, Real units); + void Initialize_Cooling_Rates(); - - void Initialize_Reaction_Rates(); - - void Initialize_UVB_Ionization_and_Heating_Rates( struct parameters *P ); - - void Load_UVB_Ionization_and_Heating_Rates( struct parameters *P ); - - void Copy_UVB_Rates_to_GPU(); - - void Reset( ); - - #ifdef TEXTURES_UVB_INTERPOLATION - void Bind_GPU_Textures( int size, float *H_HI_h, float *H_HeI_h, float *H_HeII_h , float *I_HI_h, float *I_HeI_h, float *I_HeII_h ); - #endif -}; + void Initialize_Reaction_Rates(); + + void Initialize_UVB_Ionization_and_Heating_Rates(struct Parameters *P); + + void Load_UVB_Ionization_and_Heating_Rates(struct Parameters *P); + void Copy_UVB_Rates_to_GPU(); -/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) -* \brief When passed an array of conserved variables and a timestep, update the ionization fractions of H and He and update -the internal energy to account for radiative cooling and photoheating from the UV background. */ -void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Chemistry_Header &Chem_H); + void Reset(); + #ifdef TEXTURES_UVB_INTERPOLATION + void Bind_GPU_Textures(int size, float *H_HI_h, float *H_HeI_h, float *H_HeII_h, float *I_HI_h, float *I_HeI_h, + float *I_HeII_h); + #endif +}; +/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int +n_ghost, int n_fields, Real dt, Real gamma) +* \brief When passed an array of conserved variables and a timestep, update the +ionization fractions of H and He and update the internal energy to account for +radiative cooling and photoheating from the UV background. */ +void Do_Chemistry_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Chemistry_Header &Chem_H); #endif #endif \ No newline at end of file diff --git a/src/chemistry_gpu/chemistry_io.cpp b/src/chemistry_gpu/chemistry_io.cpp index 20cb53d7b..20d23318e 100644 --- a/src/chemistry_gpu/chemistry_io.cpp +++ b/src/chemistry_gpu/chemistry_io.cpp @@ -1,110 +1,92 @@ #ifdef CHEMISTRY_GPU -#include -#include -#include -#include -#include -#include "chemistry_gpu.h" -#include "../io/io.h" - - -using namespace std; - - -void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates( struct parameters *P ){ - - + #include // provides std::strcpy (strcpy in this file) + #include + #include + #include + #include + #include + + #include "../io/io.h" + #include "chemistry_gpu.h" + +void Chem_GPU::Load_UVB_Ionization_and_Heating_Rates(struct Parameters *P) +{ char uvb_filename[100]; // create the filename to read from strcpy(uvb_filename, P->UVB_rates_file); - chprintf( " Loading UVB rates: %s\n", uvb_filename); - + chprintf(" Loading UVB rates: %s\n", uvb_filename); - std::fstream in(uvb_filename); std::string line; std::vector> v; int i = 0; - if (in.is_open()){ - while (std::getline(in, line)) - { - if ( line.find("#") == 0 ) continue; - - float value; - std::stringstream ss(line); - // chprintf( "%s \n", line.c_str() ); - v.push_back(std::vector()); - - while (ss >> value){ - v[i].push_back(value); - } - i += 1; + if (in.is_open()) { + while (std::getline(in, line)) { + if (line.find("#") == 0) continue; + + float value; + std::stringstream ss(line); + // chprintf( "%s \n", line.c_str() ); + v.push_back(std::vector()); + + while (ss >> value) { + v[i].push_back(value); + } + i += 1; } in.close(); - } else{ + } else { chprintf(" Error: Unable to open UVB rates file: %s\n", uvb_filename); exit(1); } - + int n_lines = i; - - chprintf( " Loaded %d lines in file\n", n_lines); - - rates_z_h = (float *)malloc(sizeof(float)*n_lines); - Heat_rates_HI_h = (float *)malloc(sizeof(float)*n_lines); - Heat_rates_HeI_h = (float *)malloc(sizeof(float)*n_lines); - Heat_rates_HeII_h = (float *)malloc(sizeof(float)*n_lines); - Ion_rates_HI_h = (float *)malloc(sizeof(float)*n_lines); - Ion_rates_HeI_h = (float *)malloc(sizeof(float)*n_lines); - Ion_rates_HeII_h = (float *)malloc(sizeof(float)*n_lines); - - Real eV_to_ergs, heat_units, ion_units; + + chprintf(" Loaded %d lines in file\n", n_lines); + + rates_z_h = (float *)malloc(sizeof(float) * n_lines); + Heat_rates_HI_h = (float *)malloc(sizeof(float) * n_lines); + Heat_rates_HeI_h = (float *)malloc(sizeof(float) * n_lines); + Heat_rates_HeII_h = (float *)malloc(sizeof(float) * n_lines); + Ion_rates_HI_h = (float *)malloc(sizeof(float) * n_lines); + Ion_rates_HeI_h = (float *)malloc(sizeof(float) * n_lines); + Ion_rates_HeII_h = (float *)malloc(sizeof(float) * n_lines); + + Real eV_to_ergs, heat_units, ion_units; eV_to_ergs = 1.60218e-12; heat_units = eV_to_ergs / H.cooling_units; ion_units = H.time_units; - - for (i=0; i rates_z_h[i+1] ){ - chprintf( " ERROR: UVB rates must be ordered such that redshift is increasing as the rows increase in the file\n", uvb_filename); + + for (i = 0; i < n_lines - 1; i++) { + if (rates_z_h[i] > rates_z_h[i + 1]) { + chprintf( + " ERROR: UVB rates must be ordered such that redshift is increasing " + "as the rows increase in the file\n", + uvb_filename); exit(2); } } - + n_uvb_rates_samples = n_lines; - scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples-1] + 1 ); + scale_factor_UVB_on = 1 / (rates_z_h[n_uvb_rates_samples - 1] + 1); chprintf(" Loaded UVB rates: \n"); - chprintf(" N redshift values: %d \n", n_uvb_rates_samples ); - chprintf(" z_min = %f z_max = %f \n", rates_z_h[0], rates_z_h[n_uvb_rates_samples-1] ); - chprintf(" UVB on: a=%f \n", scale_factor_UVB_on ); - - + chprintf(" N redshift values: %d \n", n_uvb_rates_samples); + chprintf(" z_min = %f z_max = %f \n", rates_z_h[0], rates_z_h[n_uvb_rates_samples - 1]); + chprintf(" UVB on: a=%f \n", scale_factor_UVB_on); } - - - - - - - - - - - - - - -#endif \ No newline at end of file +#endif diff --git a/src/chemistry_gpu/rates.cuh b/src/chemistry_gpu/rates.cuh index 5a9455824..bf09aabc9 100644 --- a/src/chemistry_gpu/rates.cuh +++ b/src/chemistry_gpu/rates.cuh @@ -1,170 +1,162 @@ #ifdef CHEMISTRY_GPU -#include "chemistry_gpu.h" -#include"../global/global_cuda.h" - - + #include "../global/global_cuda.h" + #include "chemistry_gpu.h" // Calculation of k1 (HI + e --> HII + 2e) // k1_rate -__host__ __device__ Real coll_i_HI_rate(Real T, Real units ); +__host__ __device__ Real coll_i_HI_rate(Real T, Real units); -//Calculation of k3 (HeI + e --> HeII + 2e) -// k3_rate -__host__ __device__ Real coll_i_HeI_rate(Real T, Real units ); +// Calculation of k3 (HeI + e --> HeII + 2e) +// k3_rate +__host__ __device__ Real coll_i_HeI_rate(Real T, Real units); -//Calculation of k4 (HeII + e --> HeI + photon) -// k4_rate -__host__ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B ); +// Calculation of k4 (HeII + e --> HeI + photon) +// k4_rate +__host__ __device__ Real recomb_HeII_rate(Real T, Real units, bool use_case_B); // k4_rate Case A -__host__ __device__ Real recomb_HeII_rate_case_A(Real T, Real units ); +__host__ __device__ Real recomb_HeII_rate_case_A(Real T, Real units); // k4_rate Case B -__host__ __device__ Real recomb_HeII_rate_case_B(Real T, Real units ); +__host__ __device__ Real recomb_HeII_rate_case_B(Real T, Real units); -//Calculation of k2 (HII + e --> HI + photon) -// k2_rate -__host__ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B ); +// Calculation of k2 (HII + e --> HI + photon) +// k2_rate +__host__ __device__ Real recomb_HII_rate(Real T, Real units, bool use_case_B); // k2_rate Case A -__host__ __device__ Real recomb_HII_rate_case_A(Real T, Real units ); +__host__ __device__ Real recomb_HII_rate_case_A(Real T, Real units); // k2_rate Case B -__host__ __device__ Real recomb_HII_rate_case_B(Real T, Real units ); +__host__ __device__ Real recomb_HII_rate_case_B(Real T, Real units); -//Calculation of k5 (HeII + e --> HeIII + 2e) -// k5_rate -__host__ __device__ Real coll_i_HeII_rate(Real T, Real units ); +// Calculation of k5 (HeII + e --> HeIII + 2e) +// k5_rate +__host__ __device__ Real coll_i_HeII_rate(Real T, Real units); -//Calculation of k6 (HeIII + e --> HeII + photon) -// k6_rate -__host__ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B ); +// Calculation of k6 (HeIII + e --> HeII + photon) +// k6_rate +__host__ __device__ Real recomb_HeIII_rate(Real T, Real units, bool use_case_B); // k6_rate Case A -__host__ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units ); +__host__ __device__ Real recomb_HeIII_rate_case_A(Real T, Real units); // k6_rate Case B -__host__ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units ); - -//Calculation of k57 (HI + HI --> HII + HI + e) -// k57_rate -__host__ __device__ Real coll_i_HI_HI_rate(Real T, Real units ); +__host__ __device__ Real recomb_HeIII_rate_case_B(Real T, Real units); -//Calculation of k58 (HI + HeI --> HII + HeI + e) -// k58_rate -__host__ __device__ Real coll_i_HI_HeI_rate(Real T, Real units ); +// Calculation of k57 (HI + HI --> HII + HI + e) +// k57_rate +__host__ __device__ Real coll_i_HI_HI_rate(Real T, Real units); -//Calculation of ceHI. -// Cooling collisional excitation HI -__host__ __device__ Real cool_ceHI_rate(Real T, Real units ); +// Calculation of k58 (HI + HeI --> HII + HeI + e) +// k58_rate +__host__ __device__ Real coll_i_HI_HeI_rate(Real T, Real units); -//Calculation of ceHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ceHeI_rate(Real T, Real units ); +// Calculation of ceHI. +// Cooling collisional excitation HI +__host__ __device__ Real cool_ceHI_rate(Real T, Real units); -//Calculation of ceHeII. -// Cooling collisional excitation HeII -__host__ __device__ Real cool_ceHeII_rate(Real T, Real units ); +// Calculation of ceHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ceHeI_rate(Real T, Real units); -//Calculation of ciHeIS. -// Cooling collisional ionization HeIS -__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units ); +// Calculation of ceHeII. +// Cooling collisional excitation HeII +__host__ __device__ Real cool_ceHeII_rate(Real T, Real units); -//Calculation of ciHI. -// Cooling collisional ionization HI -__host__ __device__ Real cool_ciHI_rate(Real T, Real units ); +// Calculation of ciHeIS. +// Cooling collisional ionization HeIS +__host__ __device__ Real cool_ciHeIS_rate(Real T, Real units); +// Calculation of ciHI. +// Cooling collisional ionization HI +__host__ __device__ Real cool_ciHI_rate(Real T, Real units); -//Calculation of ciHeI. -// Cooling collisional ionization HeI -__host__ __device__ Real cool_ciHeI_rate(Real T, Real units ); +// Calculation of ciHeI. +// Cooling collisional ionization HeI +__host__ __device__ Real cool_ciHeI_rate(Real T, Real units); -//Calculation of ciHeII. -// Cooling collisional ionization HeII -__host__ __device__ Real cool_ciHeII_rate(Real T, Real units ); +// Calculation of ciHeII. +// Cooling collisional ionization HeII +__host__ __device__ Real cool_ciHeII_rate(Real T, Real units); - -//Calculation of reHII. -// Cooling recombination HII -__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B ); +// Calculation of reHII. +// Cooling recombination HII +__host__ __device__ Real cool_reHII_rate(Real T, Real units, bool use_case_B); // Cooling recombination HII Case A -__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units ); +__host__ __device__ Real cool_reHII_rate_case_A(Real T, Real units); // Cooling recombination HII Case B -__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units ); +__host__ __device__ Real cool_reHII_rate_case_B(Real T, Real units); -//Calculation of reHII. -// Cooling recombination HeII -__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B ); +// Calculation of reHII. +// Cooling recombination HeII +__host__ __device__ Real cool_reHeII1_rate(Real T, Real units, bool use_case_B); // Cooling recombination HeII Case A -__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units ); +__host__ __device__ Real cool_reHeII1_rate_case_A(Real T, Real units); // Cooling recombination HeII Case B -__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units ); +__host__ __device__ Real cool_reHeII1_rate_case_B(Real T, Real units); -//Calculation of reHII2. -// Cooling recombination HeII Dielectronic -__host__ __device__ Real cool_reHeII2_rate(Real T, Real units ); +// Calculation of reHII2. +// Cooling recombination HeII Dielectronic +__host__ __device__ Real cool_reHeII2_rate(Real T, Real units); -//Calculation of reHIII. -// Cooling recombination HeIII -__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B ); +// Calculation of reHIII. +// Cooling recombination HeIII +__host__ __device__ Real cool_reHeIII_rate(Real T, Real units, bool use_case_B); // Cooling recombination HeIII Case A -__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units ); +__host__ __device__ Real cool_reHeIII_rate_case_A(Real T, Real units); // Cooling recombination HeIII Case B -__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units ); +__host__ __device__ Real cool_reHeIII_rate_case_B(Real T, Real units); -//Calculation of brem. -// Cooling Bremsstrahlung -__host__ __device__ Real cool_brem_rate(Real T, Real units ); +// Calculation of brem. +// Cooling Bremsstrahlung +__host__ __device__ Real cool_brem_rate(Real T, Real units); -//Calculation of comp. -// Compton cooling +// Calculation of comp. +// Compton cooling __host__ __device__ Real comp_rate(Real n_e, Real T, Real zr, Real units); -__host__ __device__ Real cool_compton_rate( Real T, Real units ); - +__host__ __device__ Real cool_compton_rate(Real T, Real units); // X-ray compton heating -__host__ __device__ Real xray_heat_rate( Real n_e, Real T, Real Redshift, Real units ); - - -// Colisional excitation of neutral hydrogen (HI) and singly ionized helium (HeII) -Real __device__ Collisional_Ionization_Rate_e_HI_Abel97( Real temp ); - -Real __device__ Recombination_Rate_HII_Abel97( Real temp ); - -Real __device__ Collisional_Ionization_Rate_e_HeI_Abel97( Real temp ); - -Real __device__ Collisional_Ionization_Rate_e_HeII_Abel97( Real temp ); +__host__ __device__ Real xray_heat_rate(Real n_e, Real T, Real Redshift, Real units); -Real __device__ Collisional_Ionization_Rate_HI_HI_Lenzuni91( Real temp ); +// Colisional excitation of neutral hydrogen (HI) and singly ionized helium +// (HeII) +Real __device__ Collisional_Ionization_Rate_e_HI_Abel97(Real temp); -Real __device__ Collisional_Ionization_Rate_HII_HI_Lenzuni91( Real temp ); +Real __device__ Recombination_Rate_HII_Abel97(Real temp); -Real __device__ Collisional_Ionization_Rate_HeI_HI_Lenzuni91( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeI_Abel97(Real temp); -Real __device__ Recombination_Rate_HII_Hui97( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeII_Abel97(Real temp); -Real __device__ Recombination_Rate_HeII_Hui97( Real temp ); +Real __device__ Collisional_Ionization_Rate_HI_HI_Lenzuni91(Real temp); -Real __device__ Recombination_Rate_HeIII_Hui97( Real temp ); +Real __device__ Collisional_Ionization_Rate_HII_HI_Lenzuni91(Real temp); +Real __device__ Collisional_Ionization_Rate_HeI_HI_Lenzuni91(Real temp); -Real __device__ Cooling_Rate_Recombination_HII_Hui97( Real n_e, Real n_HII, Real temp ); +Real __device__ Recombination_Rate_HII_Hui97(Real temp); -Real __device__ Cooling_Rate_Recombination_HeII_Hui97( Real n_e, Real n_HII, Real temp ); +Real __device__ Recombination_Rate_HeII_Hui97(Real temp); -Real __device__ Cooling_Rate_Recombination_HeIII_Hui97( Real n_e, Real n_HII, Real temp ); +Real __device__ Recombination_Rate_HeIII_Hui97(Real temp); -Real __device__ Recombination_Rate_dielectronic_HeII_Hui97( Real temp ); +Real __device__ Cooling_Rate_Recombination_HII_Hui97(Real n_e, Real n_HII, Real temp); -Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97( Real n_e, Real n_HeII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HeII_Hui97(Real n_e, Real n_HII, Real temp); -Real __device__ Collisional_Ionization_Rate_e_HI_Hui97( Real temp ); +Real __device__ Cooling_Rate_Recombination_HeIII_Hui97(Real n_e, Real n_HII, Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97( Real n_e, Real n_HI, Real temp ); +Real __device__ Recombination_Rate_dielectronic_HeII_Hui97(Real temp); -Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97( Real n_e, Real n_HeII, Real temp ); +Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Hui97(Real n_e, Real n_HeII, Real temp); -// Compton cooling off the CMB -Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01( Real n_e, Real temp, Real z ); +Real __device__ Collisional_Ionization_Rate_e_HI_Hui97(Real temp); -// Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real current_z, cosmo ); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Hui97(Real n_e, Real n_HI, Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Hui97(Real n_e, Real n_HeII, Real temp); +// Compton cooling off the CMB +Real __device__ Cooling_Rate_Compton_CMB_MillesOstriker01(Real n_e, Real temp, Real z); +// Real __device__ Cooling_Rate_Compton_CMB_Peebles93( Real n_e, Real temp, Real +// current_z, cosmo ); #endif \ No newline at end of file diff --git a/src/chemistry_gpu/rates_Katz95.cuh b/src/chemistry_gpu/rates_Katz95.cuh index 4942f1558..18c5e54c2 100644 --- a/src/chemistry_gpu/rates_Katz95.cuh +++ b/src/chemistry_gpu/rates_Katz95.cuh @@ -1,58 +1,53 @@ #ifdef CHEMISTRY_GPU -#include "chemistry_gpu.h" -#include"../global/global_cuda.h" + #include "../global/global_cuda.h" + #include "chemistry_gpu.h" +// Colisional excitation of neutral hydrogen (HI) and singly ionized helium +// (HeII) -// Colisional excitation of neutral hydrogen (HI) and singly ionized helium (HeII) - -Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95( Real n_e, Real n_HI, Real temp ); - -Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); - +Real __device__ Cooling_Rate_Collisional_Excitation_e_HI_Katz95(Real n_e, Real n_HI, Real temp); +Real __device__ Cooling_Rate_Collisional_Excitation_e_HeII_Katz95(Real n_e, Real n_HeII, Real temp); // Colisional ionization of HI, HeI and HeII -Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95( Real n_e, Real n_HI, Real temp ); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HI_Katz95(Real n_e, Real n_HI, Real temp); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95(Real n_e, Real n_HeI, Real temp); -Real __device__ Cooling_Rate_Collisional_Ionization_e_HeI_Katz95( Real n_e, Real n_HeI, Real temp ); +Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95(Real n_e, Real n_HeII, Real temp); -Real __device__ Cooling_Rate_Collisional_Ionization_e_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HI_Katz95(Real temp); -Real __device__ Collisional_Ionization_Rate_e_HI_Katz95( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeI_Katz95(Real temp); -Real __device__ Collisional_Ionization_Rate_e_HeI_Katz95( Real temp ); - -Real __device__ Collisional_Ionization_Rate_e_HeII_Katz95( Real temp ); +Real __device__ Collisional_Ionization_Rate_e_HeII_Katz95(Real temp); // Standard Recombination of HII, HeII and HeIII -Real __device__ Cooling_Rate_Recombination_HII_Katz95( Real n_e, Real n_HII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HII_Katz95(Real n_e, Real n_HII, Real temp); -Real __device__ Cooling_Rate_Recombination_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HeII_Katz95(Real n_e, Real n_HeII, Real temp); -Real __device__ Cooling_Rate_Recombination_HeIII_Katz95( Real n_e, Real n_HeIII, Real temp ); +Real __device__ Cooling_Rate_Recombination_HeIII_Katz95(Real n_e, Real n_HeIII, Real temp); -Real __device__ Recombination_Rate_HII_Katz95( Real temp ); +Real __device__ Recombination_Rate_HII_Katz95(Real temp); -Real __device__ Recombination_Rate_HeII_Katz95( Real temp ); +Real __device__ Recombination_Rate_HeII_Katz95(Real temp); -Real __device__ Recombination_Rate_HeIII_Katz95( Real temp ); +Real __device__ Recombination_Rate_HeIII_Katz95(Real temp); // Dielectronic recombination of HeII -Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95( Real n_e, Real n_HeII, Real temp ); - -Real __device__ Recombination_Rate_dielectronic_HeII_Katz95( Real temp ); - -// Free-Free emission (Bremsstrahlung) -Real __device__ gaunt_factor( Real log10_T ); +Real __device__ Cooling_Rate_Recombination_dielectronic_HeII_Katz95(Real n_e, Real n_HeII, Real temp); -Real __device__ Cooling_Rate_Bremsstrahlung_Katz95( Real n_e, Real n_HII, Real n_HeII, Real n_HeIII, Real temp ); +Real __device__ Recombination_Rate_dielectronic_HeII_Katz95(Real temp); +// Free-Free emission (Bremsstrahlung) +Real __device__ gaunt_factor(Real log10_T); -// Compton cooling off the CMB -Real __device__ Cooling_Rate_Compton_CMB_Katz95( Real n_e, Real temp, Real z ); +Real __device__ Cooling_Rate_Bremsstrahlung_Katz95(Real n_e, Real n_HII, Real n_HeII, Real n_HeIII, Real temp); +// Compton cooling off the CMB +Real __device__ Cooling_Rate_Compton_CMB_Katz95(Real n_e, Real temp, Real z); #endif \ No newline at end of file diff --git a/src/cooling/cooling_cuda.cu b/src/cooling/cooling_cuda.cu index f1d0a8ee5..5cbebbb72 100644 --- a/src/cooling/cooling_cuda.cu +++ b/src/cooling/cooling_cuda.cu @@ -1,199 +1,195 @@ /*! \file cooling_cuda.cu * \brief Functions to calculate cooling rate for a given rho, P, dt. */ -#ifdef CUDA #ifdef COOLING_GPU -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../cooling/cooling_cuda.h" + #include -#ifdef CLOUDY_COOL -#include "../cooling/texture_utilities.h" -#endif + #include "../cooling/cooling_cuda.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" + + #ifdef CLOUDY_COOL + #include "../cooling/texture_utilities.h" + #endif cudaTextureObject_t coolTexObj = 0; cudaTextureObject_t heatTexObj = 0; -void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma){ - - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; +void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) +{ + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gama, coolTexObj, heatTexObj); - CudaCheckError(); + hipLaunchKernelGGL(cooling_kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, + gama, coolTexObj, heatTexObj); + GPU_Error_Check(); } - -/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) - * \brief When passed an array of conserved variables and a timestep, adjust the value - of the total energy for each cell according to the specified cooling function. */ -__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int + n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj) + * \brief When passed an array of conserved variables and a timestep, adjust + the value of the total energy for each cell according to the specified cooling + function. */ +__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { - - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int is, ie, js, je, ks, ke; is = n_ghost; - ie = nx-n_ghost; + ie = nx - n_ghost; if (ny == 1) { js = 0; je = 1; - } - else { + } else { js = n_ghost; - je = ny-n_ghost; + je = ny - n_ghost; } if (nz == 1) { ks = 0; ke = 1; - } - else { + } else { ks = n_ghost; - ke = nz-n_ghost; + ke = nz - n_ghost; } Real d, E; Real n, T, T_init; Real del_T, dt_sub; - Real mu; // mean molecular weight - Real cool; //cooling rate per volume, erg/s/cm^3 - //#ifndef DE + Real mu; // mean molecular weight + Real cool; // cooling rate per volume, erg/s/cm^3 + // #ifndef DE Real vx, vy, vz, p; - //#endif + // #endif #ifdef DE Real ge; #endif mu = 0.6; - //mu = 1.27; + // mu = 1.27; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int id = threadIdx.x + blockId * blockDim.x; - int zid = id / (nx*ny); - int yid = (id - zid*nx*ny) / nx; - int xid = id - zid*nx*ny - yid*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; // only threads corresponding to real cells do the calculation if (xid >= is && xid < ie && yid >= js && yid < je && zid >= ks && zid < ke) { - // load values of density and pressure - d = dev_conserved[ id]; - E = dev_conserved[4*n_cells + id]; + d = dev_conserved[id]; + E = dev_conserved[4 * n_cells + id]; // don't apply cooling if this thread crashed - if (E < 0.0 || E != E) return; - //#ifndef DE - vx = dev_conserved[1*n_cells + id] / d; - vy = dev_conserved[2*n_cells + id] / d; - vz = dev_conserved[3*n_cells + id] / d; - p = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - p = fmax(p, (Real) TINY_NUMBER); - //#endif - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id] / d; - ge = fmax(ge, (Real) TINY_NUMBER); - #endif + if (E < 0.0 || E != E) { + return; + } + // #ifndef DE + vx = dev_conserved[1 * n_cells + id] / d; + vy = dev_conserved[2 * n_cells + id] / d; + vz = dev_conserved[3 * n_cells + id] / d; + p = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + p = fmax(p, (Real)TINY_NUMBER); + // #endif + #ifdef DE + ge = dev_conserved[(n_fields - 1) * n_cells + id] / d; + ge = fmax(ge, (Real)TINY_NUMBER); + #endif // calculate the number density of the gas (in cgs) - n = d*DENSITY_UNIT / (mu * MP); + n = d * DENSITY_UNIT / (mu * MP); // calculate the temperature of the gas - T_init = p*PRESSURE_UNIT/ (n*KB); - #ifdef DE - T_init = d*ge*(gamma-1.0)*PRESSURE_UNIT/(n*KB); - #endif + T_init = p * PRESSURE_UNIT / (n * KB); + #ifdef DE + T_init = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); + #endif // calculate cooling rate per volume T = T_init; - // call the cooling function - #ifdef CLOUDY_COOL + // call the cooling function + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - #endif + #endif // calculate change in temperature given dt - del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB); + del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); // limit change in temperature to 1% - while (del_T/T > 0.01) { + while (del_T / T > 0.01) { // what dt gives del_T = 0.01*T? - dt_sub = 0.01*T*n*KB/(cool*TIME_UNIT*(gamma-1.0)); + dt_sub = 0.01 * T * n * KB / (cool * TIME_UNIT * (gamma - 1.0)); // apply that dt - T -= cool*dt_sub*TIME_UNIT*(gamma-1.0)/(n*KB); + T -= cool * dt_sub * TIME_UNIT * (gamma - 1.0) / (n * KB); // how much time is left from the original timestep? dt -= dt_sub; - // calculate cooling again - #ifdef CLOUDY_COOL + // calculate cooling again + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - #endif + #endif // calculate new change in temperature - del_T = cool*dt*TIME_UNIT*(gamma-1.0)/(n*KB); + del_T = cool * dt * TIME_UNIT * (gamma - 1.0) / (n * KB); } // calculate final temperature T -= del_T; // adjust value of energy based on total change in temperature - del_T = T_init - T; // total change in T - E -= n*KB*del_T / ((gamma-1.0)*ENERGY_UNIT); - #ifdef DE - ge -= KB*del_T / (mu*MP*(gamma-1.0)*SP_ENERGY_UNIT); - #endif - // calculate cooling rate for new T - #ifdef CLOUDY_COOL + del_T = T_init - T; // total change in T + E -= n * KB * del_T / ((gamma - 1.0) * ENERGY_UNIT); + #ifdef DE + ge -= KB * del_T / (mu * MP * (gamma - 1.0) * SP_ENERGY_UNIT); + #endif + + // calculate cooling rate for new T + #ifdef CLOUDY_COOL cool = Cloudy_cool(n, T, coolTexObj, heatTexObj); - #else + #else cool = CIE_cool(n, T); - //printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); - #endif + // printf("%d %d %d %e %e %e\n", xid, yid, zid, n, T, cool); + #endif // and send back from kernel - dev_conserved[4*n_cells + id] = E; - #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] = d*ge; - #endif - + dev_conserved[4 * n_cells + id] = E; + #ifdef DE + dev_conserved[(n_fields - 1) * n_cells + id] = d * ge; + #endif } - } - /* \fn __device__ Real test_cool(Real n, Real T) * \brief Cooling function from Creasey 2011. */ __device__ Real test_cool(int tid, Real n, Real T) { Real T0, T1, lambda, cool; - T0 = 10000.0; - T1 = 20*T0; + T0 = 10000.0; + T1 = 20 * T0; cool = 0.0; - //lambda = 5.0e-24; //cooling coefficient, 5e-24 erg cm^3 s^-1 - lambda = 5.0e-20; //cooling coefficient, 5e-24 erg cm^3 s^-1 + // lambda = 5.0e-24; //cooling coefficient, 5e-24 erg cm^3 s^-1 + lambda = 5.0e-20; // cooling coefficient, 5e-24 erg cm^3 s^-1 // constant cooling rate - //cool = n*n*lambda; + // cool = n*n*lambda; // Creasey cooling function - if (T >= T0 && T <= 0.5*(T1+T0)) { - cool = n*n*lambda*(T - T0) / T0; + if (T >= T0 && T <= 0.5 * (T1 + T0)) { + cool = n * n * lambda * (T - T0) / T0; } - if (T >= 0.5*(T1+T0) && T <= T1) { - cool = n*n*lambda*(T1 - T) / T0; + if (T >= 0.5 * (T1 + T0) && T <= T1) { + cool = n * n * lambda * (T1 - T) / T0; } - - //printf("%d %f %f\n", tid, T, cool); + // printf("%d %f %f\n", tid, T, cool); return cool; - } - /* \fn __device__ Real primordial_cool(Real n, Real T) * \brief Primordial hydrogen/helium cooling curve derived according to Katz et al. 1996. */ @@ -210,74 +206,76 @@ __device__ Real primordial_cool(Real n, Real T) // set flag to 1 for photoionization & heating heat_flag = 0; - //Real X = 0.76; //hydrogen abundance by mass - Y = 0.24; //helium abundance by mass - y = Y/(4 - 4*Y); + // Real X = 0.76; //hydrogen abundance by mass + Y = 0.24; // helium abundance by mass + y = Y / (4 - 4 * Y); // set the hydrogen number density n_h = n; // calculate the recombination and collisional ionization rates // (Table 2 from Katz 1996) - alpha_hp = (8.4e-11) * (1.0/sqrt(T)) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7)))); - alpha_hep = (1.5e-10) * (pow(T,(-0.6353))); - alpha_d = (1.9e-3) * (pow(T,(-1.5))) * exp(-470000.0/T) * (1.0 + 0.3*exp(-94000.0/T)); - alpha_hepp = (3.36e-10)* (1.0/sqrt(T)) * pow((T/1e3),(-0.2)) * (1.0 / (1.0 + pow((T/1e6),(0.7)))); - gamma_eh0 = (5.85e-11)* sqrt(T) * exp(-157809.1/T) * (1.0 / (1.0 + sqrt(T/1e5))); - gamma_ehe0 = (2.38e-11)* sqrt(T) * exp(-285335.4/T) * (1.0 / (1.0 + sqrt(T/1e5))); - gamma_ehep = (5.68e-12)* sqrt(T) * exp(-631515.0/T) * (1.0 / (1.0 + sqrt(T/1e5))); + alpha_hp = (8.4e-11) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))); + alpha_hep = (1.5e-10) * (pow(T, (-0.6353))); + alpha_d = (1.9e-3) * (pow(T, (-1.5))) * exp(-470000.0 / T) * (1.0 + 0.3 * exp(-94000.0 / T)); + alpha_hepp = (3.36e-10) * (1.0 / sqrt(T)) * pow((T / 1e3), (-0.2)) * (1.0 / (1.0 + pow((T / 1e6), (0.7)))); + gamma_eh0 = (5.85e-11) * sqrt(T) * exp(-157809.1 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + gamma_ehe0 = (2.38e-11) * sqrt(T) * exp(-285335.4 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); + gamma_ehep = (5.68e-12) * sqrt(T) * exp(-631515.0 / T) * (1.0 / (1.0 + sqrt(T / 1e5))); // externally evaluated integrals for photoionization rates // assumed J(nu) = 10^-22 (nu_L/nu) - gamma_lh0 = 3.19851e-13; + gamma_lh0 = 3.19851e-13; gamma_lhe0 = 3.13029e-13; gamma_lhep = 2.00541e-14; // externally evaluated integrals for heating rates - e_h0 = 2.4796e-24; + e_h0 = 2.4796e-24; e_he0 = 6.86167e-24; e_hep = 6.21868e-25; - // assuming no photoionization, solve equations for number density of // each species - n_e = n_h; //as a first guess, use the hydrogen number density + n_e = n_h; // as a first guess, use the hydrogen number density n_iter = 20; - diff = 1.0; - tol = 1.0e-6; + diff = 1.0; + tol = 1.0e-6; if (heat_flag) { - for (int i=0; i= 4.0 && log10(T) < 5.9) { + } else if (log10(T) >= 4.0 && log10(T) < 5.9) { lambda = pow(10.0, (-1.3 * (log10(T) - 5.25) * (log10(T) - 5.25) - 21.25)); - } - else if (log10(T) >= 5.9 && log10(T) < 7.4) { + } else if (log10(T) >= 5.9 && log10(T) < 7.4) { lambda = pow(10.0, (0.7 * (log10(T) - 7.1) * (log10(T) - 7.1) - 22.8)); - } - else { - lambda = pow(10.0, (0.45*log10(T) - 26.065)); + } else { + lambda = pow(10.0, (0.45 * log10(T) - 26.065)); } // cooling rate per unit volume - cool = n*n*lambda; + cool = n * n * lambda; return cool; - } - -#ifdef CLOUDY_COOL -/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) + #ifdef CLOUDY_COOL +/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t + coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating tables at z = 0 with solar metallicity and an HM05 UV background. */ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) { - Real lambda = 0.0; //cooling rate, erg s^-1 cm^3 - Real H = 0.0; //heating rate, erg s^-1 cm^3 - Real cool = 0.0; //cooling per unit volume, erg /s / cm^3 + Real lambda = 0.0; // cooling rate, erg s^-1 cm^3 + Real H = 0.0; // heating rate, erg s^-1 cm^3 + Real cool = 0.0; // cooling per unit volume, erg /s / cm^3 float log_n, log_T; log_n = log10(n); log_T = log10(T); // remap coordinates for texture // remapped = (input - TABLE_MIN_VALUE)*(1/TABLE_SPACING) - // remapped = (input - TABLE_MIN_VALUE)*(NUM_CELLS_PER_DECADE) - log_T = (log_T - 1.0)*10; - log_n = (log_n + 6.0)*10; + // remapped = (input - TABLE_MIN_VALUE)*(NUM_CELLS_PER_DECADE) + log_T = (log_T - 1.0) * 10; + log_n = (log_n + 6.0) * 10; + + // Note: although the cloudy table columns are n,T,L,H , T is the fastest + // variable so it is treated as "x" This is why the Texture calls are T first, + // then n: Bilinear_Texture(tex, log_T, log_n) - // Note: although the cloudy table columns are n,T,L,H , T is the fastest variable so it is treated as "x" - // This is why the Texture calls are T first, then n: Bilinear_Texture(tex, log_T, log_n) - // don't cool below 10 K if (log10(T) > 1.0) { lambda = Bilinear_Texture(coolTexObj, log_T, log_n); - } - else lambda = 0.0; + } else + lambda = 0.0; H = Bilinear_Texture(heatTexObj, log_T, log_n); // cooling rate per unit volume - cool = n*n*(powf(10, lambda) - powf(10, H)); + cool = n * n * (powf(10, lambda) - powf(10, H)); // printf("DEBUG Cloudy L350: %.17e\n",cool); return cool; } -#endif //CLOUDY_COOL - - - + #endif // CLOUDY_COOL -#endif //COOLING_GPU -#endif //CUDA +#endif // COOLING_GPU diff --git a/src/cooling/cooling_cuda.h b/src/cooling/cooling_cuda.h index f8d098e59..d9105fde3 100644 --- a/src/cooling/cooling_cuda.h +++ b/src/cooling/cooling_cuda.h @@ -1,50 +1,50 @@ /*! \file cooling_cuda.h * \brief Declarations of cooling functions. */ -#ifdef CUDA #ifdef COOLING_GPU -#pragma once + #pragma once -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" + #include + + #include "../global/global.h" + #include "../utils/gpu.hpp" extern cudaTextureObject_t coolTexObj; extern cudaTextureObject_t heatTexObj; -/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma) - * \brief When passed an array of conserved variables and a timestep, adjust the value - of the total energy for each cell according to the specified cooling function. */ +/*! \fn void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int + n_ghost, int n_fields, Real dt, Real gamma) + * \brief When passed an array of conserved variables and a timestep, adjust + the value of the total energy for each cell according to the specified cooling + function. */ void Cooling_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma); - -/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real dt, Real gamma) - * \brief When passed an array of conserved variables and a timestep, adjust the value - of the total energy for each cell according to the specified cooling function. */ -__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); - +/*! \fn void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int + n_ghost, Real dt, Real gamma) + * \brief When passed an array of conserved variables and a timestep, adjust + the value of the total energy for each cell according to the specified cooling + function. */ +__global__ void cooling_kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, + Real gamma, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); /* \fn __device__ Real test_cool(Real n, Real T) * \brief Cooling function from Creasey 2011. */ __device__ Real test_cool(int tid, Real n, Real T); - /* \fn __device__ Real primordial_cool(Real n, Real T) * \brief Primordial hydrogen/helium cooling curve derived according to Katz et al. 1996. */ __device__ Real primordial_cool(Real n, Real T); - /* \fn __device__ Real CIE_cool(Real n, Real T) * \brief Analytic fit to a solar metallicity CIE cooling curve calculated using Cloudy. */ __device__ Real CIE_cool(Real n, Real T); - -/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +/* \fn __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t + coolTexObj, cudaTextureObject_t heatTexObj) * \brief Uses texture mapping to interpolate Cloudy cooling/heating tables at z = 0 with solar metallicity and an HM05 UV background. */ __device__ Real Cloudy_cool(Real n, Real T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj); -#endif //COOLING_GPU -#endif //CUDA +#endif // COOLING_GPU diff --git a/src/cooling/load_cloudy_texture.cu b/src/cooling/load_cloudy_texture.cu index 2d5758bbd..5e0f2d460 100644 --- a/src/cooling/load_cloudy_texture.cu +++ b/src/cooling/load_cloudy_texture.cu @@ -1,29 +1,27 @@ /*! \file load_cloudy_texture.cu * \brief Wrapper file to load cloudy cooling table as CUDA texture. */ -#ifdef CUDA #ifdef CLOUDY_COOL -#include -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../cooling/load_cloudy_texture.h" -#include "../cooling/cooling_cuda.h" -#include "../cooling/texture_utilities.h" + #include + #include -#include "../io/io.h" // provides chprintf + #include "../cooling/cooling_cuda.h" + #include "../cooling/load_cloudy_texture.h" + #include "../cooling/texture_utilities.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" // provides chprintf -cudaArray* cuCoolArray; -cudaArray* cuHeatArray; +cudaArray *cuCoolArray; +cudaArray *cuHeatArray; void Test_Cloudy_Textures(); void Test_Cloudy_Speed(); - /* \fn void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) * \brief Load the Cloudy cooling tables into host (CPU) memory. */ -void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) +void Host_Read_Cooling_Tables(float *cooling_table, float *heating_table) { double *n_arr; double *T_arr; @@ -36,56 +34,51 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) FILE *infile; char buffer[0x1000]; - char * pch; + char *pch; // allocate arrays for temperature data - n_arr = (double *) malloc(nx*ny*sizeof(double)); - T_arr = (double *) malloc(nx*ny*sizeof(double)); - L_arr = (double *) malloc(nx*ny*sizeof(double)); - H_arr = (double *) malloc(nx*ny*sizeof(double)); + n_arr = (double *)malloc(nx * ny * sizeof(double)); + T_arr = (double *)malloc(nx * ny * sizeof(double)); + L_arr = (double *)malloc(nx * ny * sizeof(double)); + H_arr = (double *)malloc(nx * ny * sizeof(double)); // Read in cloudy cooling/heating curve (function of density and temperature) - i=0; + i = 0; + + const char *cloudy_filename1 = "./cloudy_coolingcurve.txt"; + const char *cloudy_filename2 = "src/cooling/cloudy_coolingcurve.txt"; + const char *file_in_use; - const char* cloudy_filename1 = "./cloudy_coolingcurve.txt"; - const char* cloudy_filename2 = "src/cooling/cloudy_coolingcurve.txt"; - const char* file_in_use; - - infile = fopen(cloudy_filename1, "r"); + infile = fopen(cloudy_filename1, "r"); file_in_use = cloudy_filename1; if (infile == NULL) { - infile = fopen(cloudy_filename2, "r"); + infile = fopen(cloudy_filename2, "r"); file_in_use = cloudy_filename2; } - if (infile == NULL) { - chprintf("Unable to open Cloudy file with expected relative paths:\n %s \n OR \n %s\n", cloudy_filename1, cloudy_filename2); + chprintf( + "Unable to open Cloudy file with expected relative paths:\n %s \n OR " + "\n %s\n", + cloudy_filename1, cloudy_filename2); exit(1); } else { chprintf("Using Cloudy file at relative path: %s \n", file_in_use); } - - while (fgets(buffer, sizeof(buffer), infile) != NULL) - { + while (fgets(buffer, sizeof(buffer), infile) != NULL) { if (buffer[0] == '#') { continue; - } - else { - pch = strtok(buffer, "\t"); + } else { + pch = strtok(buffer, "\t"); n_arr[i] = atof(pch); - while (pch != NULL) - { + while (pch != NULL) { pch = strtok(NULL, "\t"); - if (pch != NULL) - T_arr[i] = atof(pch); + if (pch != NULL) T_arr[i] = atof(pch); pch = strtok(NULL, "\t"); - if (pch != NULL) - L_arr[i] = atof(pch); + if (pch != NULL) L_arr[i] = atof(pch); pch = strtok(NULL, "\t"); - if (pch != NULL) - H_arr[i] = atof(pch); + if (pch != NULL) H_arr[i] = atof(pch); } i++; } @@ -93,8 +86,7 @@ void Host_Read_Cooling_Tables(float* cooling_table, float* heating_table) fclose(infile); // copy data from cooling array into the table - for (i=0; i(coolTexObj, rlog_T, rlog_n); - float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); + float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); + float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); // Hackfully print it out for processing for correctness - printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, heat); - + printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n", log_T, log_n, lambda, heat); } - -/* Consider this function only to be used at the end of Load_Cuda_Textures when testing - * Evaluate texture on grid of size num_n num_T for variables n,T */ -__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, cudaTextureObject_t heatTexObj) +/* Consider this function only to be used at the end of Load_Cuda_Textures when + * testing Evaluate texture on grid of size num_n num_T for variables n,T */ +__global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject_t coolTexObj, + cudaTextureObject_t heatTexObj) { - int id,id_n,id_T; + int id, id_n, id_T; id = threadIdx.x + blockIdx.x * blockDim.x; // Calculate log_T and log_n based on id - id_T = id/num_n; - id_n = id%num_n; + id_T = id / num_n; + id_n = id % num_n; - // Min value, but include id=-1 as an outside value to check clamping. Use dx = 0.05 instead of 0.1 to check interpolation - // float log_T = 1.0 + (id_T-1)*0.05; + // Min value, but include id=-1 as an outside value to check clamping. Use dx + // = 0.05 instead of 0.1 to check interpolation float log_T = 1.0 + + // (id_T-1)*0.05; // float log_n = -6.0 + (id_n-1)*0.05; // Remap for texture with normalized coords @@ -245,56 +239,48 @@ __global__ void Test_Cloudy_Speed_Kernel(int num_n, int num_T, cudaTextureObject // float rlog_T = (log_T - 1.0) * 10; // float rlog_n = (log_n + 6.0) * 10; - float rlog_T = (id_T - 1)*0.0125; - float rlog_n = (id_n - 1)*0.0125; + float rlog_T = (id_T - 1) * 0.0125; + float rlog_n = (id_n - 1) * 0.0125; // Evaluate - float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); - float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); + float lambda = Bilinear_Texture(coolTexObj, rlog_T, rlog_n); // tex2D(coolTexObj, rlog_T, rlog_n); + float heat = Bilinear_Texture(heatTexObj, rlog_T, rlog_n); // tex2D(heatTexObj, rlog_T, rlog_n); // Hackfully print it out for processing for correctness - // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, heat); - + // printf("TEST_Cloudy: %.17e %.17e %.17e %.17e \n",log_T, log_n, lambda, + // heat); } -/* Consider this function only to be used at the end of Load_Cuda_Textures when testing - * Evaluate texture on grid of size num_n num_T for variables n,T */ +/* Consider this function only to be used at the end of Load_Cuda_Textures when + * testing Evaluate texture on grid of size num_n num_T for variables n,T */ void Test_Cloudy_Textures() { - int num_n = 1+2*121; - int num_T = 1+2*81; - dim3 dim1dGrid((num_n*num_T+TPB-1)/TPB, 1, 1); + int num_n = 1 + 2 * 121; + int num_T = 1 + 2 * 81; + dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel,dim1dGrid,dim1dBlock,0,0,num_n,num_T,coolTexObj,heatTexObj); - CHECK(cudaDeviceSynchronize()); + hipLaunchKernelGGL(Test_Cloudy_Textures_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); + GPU_Error_Check(cudaDeviceSynchronize()); printf("Exiting due to Test_Cloudy_Textures() being called \n"); exit(0); } void Test_Cloudy_Speed() { - int num_n = 1+80*121; - int num_T = 1+80*81; - dim3 dim1dGrid((num_n*num_T+TPB-1)/TPB, 1, 1); + int num_n = 1 + 80 * 121; + int num_T = 1 + 80 * 81; + dim3 dim1dGrid((num_n * num_T + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - CHECK(cudaDeviceSynchronize()); - Real time_start = get_time(); - for (int i=0; i<100; i++) { - hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel,dim1dGrid,dim1dBlock,0,0,num_n,num_T,coolTexObj,heatTexObj); + GPU_Error_Check(cudaDeviceSynchronize()); + Real time_start = Get_Time(); + for (int i = 0; i < 100; i++) { + hipLaunchKernelGGL(Test_Cloudy_Speed_Kernel, dim1dGrid, dim1dBlock, 0, 0, num_n, num_T, coolTexObj, heatTexObj); } - CHECK(cudaDeviceSynchronize()); - Real time_end = get_time(); + GPU_Error_Check(cudaDeviceSynchronize()); + Real time_end = Get_Time(); printf(" Cloudy Test Time %9.4f micro-s \n", (time_end - time_start)); printf("Exiting due to Test_Cloudy_Speed() being called \n"); exit(0); } - - - - - - - -#endif -#endif +#endif // CLOUDY_COOL diff --git a/src/cooling/load_cloudy_texture.h b/src/cooling/load_cloudy_texture.h index 164125392..7d6307f71 100644 --- a/src/cooling/load_cloudy_texture.h +++ b/src/cooling/load_cloudy_texture.h @@ -1,21 +1,19 @@ /*! \file load_cloudy_texture.h * \brief Wrapper file to load cloudy cooling table as CUDA texture. */ -#ifdef CUDA #ifdef CLOUDY_COOL -#pragma once + #pragma once -#include "../global/global.h" + #include "../global/global.h" /* \fn void Load_Cuda_Textures() * \brief Load the Cloudy cooling tables into texture memory on the GPU. */ void Load_Cuda_Textures(); /* \fn void Free_Cuda_Textures() - * \brief Unbind the texture memory on the GPU, and free the associated Cuda arrays. */ + * \brief Unbind the texture memory on the GPU, and free the associated Cuda + * arrays. */ void Free_Cuda_Textures(); -#endif -#endif - +#endif // CLOUDY_COOL diff --git a/src/cooling/texture_utilities.h b/src/cooling/texture_utilities.h index 6b271d5a1..fc335bcf7 100644 --- a/src/cooling/texture_utilities.h +++ b/src/cooling/texture_utilities.h @@ -1,23 +1,22 @@ /*! \file texture_utilities.h * \brief Declarations of functions needed for textures. */ -// WARNING: do not include this header file in any .cpp file or any .h file that would be included into a .cpp file -// because tex2D is undefined when compiling with gcc. +// WARNING: do not include this header file in any .cpp file or any .h file that +// would be included into a .cpp file because tex2D is undefined when compiling +// with gcc. -#ifdef CUDA #pragma once -#include "../utils/gpu.hpp" #include + #include "../global/global.h" +#include "../utils/gpu.hpp" -inline __device__ float lerp(float v0, float v1, float f) -{ - return fma(f, v1, fma(-f,v0,v0)); -} +inline __device__ float lerp(float v0, float v1, float f) { return fma(f, v1, fma(-f, v0, v0)); } /* \fn float Bilinear_Texture(cudaTextureObject_t tex, float x, float y) - \brief Access texture values from tex at coordinates (x,y) using bilinear interpolation + \brief Access texture values from tex at coordinates (x,y) using bilinear + interpolation */ inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float y) { @@ -27,18 +26,16 @@ inline __device__ float Bilinear_Texture(cudaTextureObject_t tex, float x, float float fx = x - px; float fy = y - py; - // 0.5 offset is necessary to represent half-pixel offset built into texture coordinates + // 0.5 offset is necessary to represent half-pixel offset built into texture + // coordinates px += 0.5; py += 0.5; - float t00 = tex2D(tex,px,py); - float t01 = tex2D(tex,px,py+1); - float t10 = tex2D(tex,px+1,py); - float t11 = tex2D(tex,px+1,py+1); + float t00 = tex2D(tex, px, py); + float t01 = tex2D(tex, px, py + 1); + float t10 = tex2D(tex, px + 1, py); + float t11 = tex2D(tex, px + 1, py + 1); // The inner lerps interpolate along x // The outer lerp interpolates along y return lerp(lerp(t00, t10, fx), lerp(t01, t11, fx), fy); - } - -#endif //CUDA diff --git a/src/cooling_grackle/cool_grackle.cpp b/src/cooling_grackle/cool_grackle.cpp index 4392feefd..a7f5c36cb 100644 --- a/src/cooling_grackle/cool_grackle.cpp +++ b/src/cooling_grackle/cool_grackle.cpp @@ -1,184 +1,180 @@ #ifdef COOLING_GRACKLE + #include "../cooling_grackle/cool_grackle.h" -#include -#include -#include -#include "../io/io.h" -#include "../cooling_grackle/cool_grackle.h" + #include + #include + #include + #include "../grid/grid_enum.h" + #include "../io/io.h" +Cool_GK::Cool_GK(void) {} -Cool_GK::Cool_GK( void ){} +void Grid3D::Initialize_Grackle(struct Parameters *P) +{ + chprintf("Initializing Grackle... \n"); -void Grid3D::Initialize_Grackle( struct parameters *P ){ - - chprintf( "Initializing Grackle... \n"); - - Cool.Initialize( P, Cosmo ); + Cool.Initialize(P, Cosmo); Allocate_Memory_Grackle(); Initialize_Fields_Grackle(); - chprintf( "Grackle Initialized Successfully. \n\n"); - - + chprintf("Grackle Initialized Successfully. \n\n"); } - -void Cool_GK::Initialize( struct parameters *P, Cosmology &Cosmo ){ - - chprintf( " Using Grackle for chemistry and cooling \n" ); - chprintf( " N scalar fields: %d \n", NSCALARS ); +void Cool_GK::Initialize(struct Parameters *P, Cosmology &Cosmo) +{ + chprintf(" Using Grackle for chemistry and cooling \n"); + chprintf(" N scalar fields: %d \n", NSCALARS); grackle_verbose = 1; #ifdef MPI_CHOLLA // Enable output - if (procID != 0 ) grackle_verbose = 0; + if (procID != 0) grackle_verbose = 0; #endif - tiny_number = 1.e-20; - gamma = P->gamma; + gamma = P->gamma; - dens_conv = Cosmo.rho_0_gas; - energy_conv = Cosmo.v_0_gas * Cosmo.v_0_gas ; + dens_conv = Cosmo.rho_0_gas; + energy_conv = Cosmo.v_0_gas * Cosmo.v_0_gas; Real Msun = MSUN_CGS; - Real kpc = KPC_CGS; - Real km = KM_CGS - + Real kpc = KPC_CGS; + Real km = KM_CGS - dens_to_CGS = dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h; - vel_to_CGS = km; - energy_to_CGS = km * km; + dens_to_CGS = dens_conv * Msun / kpc / kpc / kpc * Cosmo.cosmo_h * Cosmo.cosmo_h; + vel_to_CGS = km; + energy_to_CGS = km * km; // First, set up the units system. // These are conversions from code units to cgs. - units.comoving_coordinates = 1; // 1 if cosmological sim, 0 if not - units.a_units = 1.0 ; // units for the expansion factor - units.a_value = Cosmo.current_a / units.a_units; - units.density_units = dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a ; - units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a; - units.time_units = KPC / Cosmo.cosmo_h ; - units.velocity_units = units.length_units / Cosmo.current_a / units.time_units; // since u = a * dx/dt - - // Second, create a chemistry object for parameters. This needs to be a pointer. + units.comoving_coordinates = 1; // 1 if cosmological sim, 0 if not + units.a_units = 1.0; // units for the expansion factor + units.a_value = Cosmo.current_a / units.a_units; + units.density_units = dens_to_CGS / Cosmo.current_a / Cosmo.current_a / Cosmo.current_a; + units.length_units = kpc / Cosmo.cosmo_h * Cosmo.current_a; + units.time_units = KPC / Cosmo.cosmo_h; + units.velocity_units = units.length_units / Cosmo.current_a / units.time_units; // since u = a * dx/dt + + // Second, create a chemistry object for parameters. This needs to be a + // pointer. data = new chemistry_data; if (set_default_chemistry_parameters(data) == 0) { - chprintf( "GRACKLE: Error in set_default_chemistry_parameters.\n"); - exit(-1) ; + chprintf("GRACKLE: Error in set_default_chemistry_parameters.\n"); + exit(-1); } // Set parameter values for chemistry. // Access the parameter storage with the struct you've created // or with the grackle_data pointer declared in grackle.h (see further below). - data->use_grackle = 1; // chemistry on - data->with_radiative_cooling = 1; // Cooling on - data->primordial_chemistry = 1; // molecular network with H, He - data->UVbackground = 1; // UV background on - // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012.h5"; // data file - // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012_cloudy.h5"; // data file - // data->grackle_data_file = "src/cooling_grackle/CloudyData_UVB=Puchwein2018_cloudy.h5"; // data file - data->grackle_data_file = P->UVB_rates_file; // data file - // data->grackle_data_file = "src/cooling/CloudyData_UVB=FG2011.h5"; // data file - data->use_specific_heating_rate = 0; + data->use_grackle = 1; // chemistry on + data->with_radiative_cooling = 1; // Cooling on + data->primordial_chemistry = 1; // molecular network with H, He + data->UVbackground = 1; // UV background on + // data->grackle_data_file = "src/cooling/CloudyData_UVB=HM2012.h5"; // data + // file data->grackle_data_file = + // "src/cooling/CloudyData_UVB=HM2012_cloudy.h5"; // data file + // data->grackle_data_file = + // "src/cooling_grackle/CloudyData_UVB=Puchwein2018_cloudy.h5"; // data file + data->grackle_data_file = P->UVB_rates_file; // data file + // data->grackle_data_file = "src/cooling/CloudyData_UVB=FG2011.h5"; // data + // file + data->use_specific_heating_rate = 0; data->use_volumetric_heating_rate = 0; - data->cmb_temperature_floor = 1; + data->cmb_temperature_floor = 1; #ifdef GRACKLE_METALS - data->metal_cooling = 1; // metal cooling off + data->metal_cooling = 1; // metal cooling off #else - chprintf( "WARNING: Metal Cooling is Off. \n" ); - data->metal_cooling = 0; // metal cooling off + chprintf("WARNING: Metal Cooling is Off. \n"); + data->metal_cooling = 0; // metal cooling off #endif #ifdef PARALLEL_OMP data->omp_nthreads = N_OMP_THREADS_GRACKLE; #endif - if ( data->UVbackground == 1) chprintf( "GRACKLE: Loading UV Background File: %s\n", data->grackle_data_file ); + if (data->UVbackground == 1) chprintf("GRACKLE: Loading UV Background File: %s\n", data->grackle_data_file); // Finally, initialize the chemistry object. if (initialize_chemistry_data(&units) == 0) { - chprintf( "GRACKLE: Error in initialize_chemistry_data.\n"); - exit(-1) ; + chprintf("GRACKLE: Error in initialize_chemistry_data.\n"); + exit(-1); } - if ( data->UVbackground == 1){ - scale_factor_UVB_on = 1 / (data->UVbackground_redshift_on + 1 ); - chprintf( "GRACKLE: UVB on: %f \n", scale_factor_UVB_on ); + if (data->UVbackground == 1) { + scale_factor_UVB_on = 1 / (data->UVbackground_redshift_on + 1); + chprintf("GRACKLE: UVB on: %f \n", scale_factor_UVB_on); } - } -void Grid3D::Allocate_Memory_Grackle( ){ - -int n_cells = H.nx * H.ny * H.nz; -int nx = Grav.nx_local; -int ny = Grav.ny_local; -int nz = Grav.nz_local; -// Set grid dimension and size. -Cool.field_size = n_cells; -Cool.fields.grid_rank = 3; -Cool.fields.grid_dimension = new int[3]; -Cool.fields.grid_start = new int[3]; -Cool.fields.grid_end = new int[3]; -Cool.fields.grid_dimension[0] = H.nx; // the active dimension -Cool.fields.grid_dimension[1] = H.ny; // the active dimension -Cool.fields.grid_dimension[2] = H.nz; // the active dimension -// grid_start and grid_end are used to ignore ghost zones. -Cool.fields.grid_start[0] = H.n_ghost; -Cool.fields.grid_start[1] = H.n_ghost; -Cool.fields.grid_start[2] = H.n_ghost; -Cool.fields.grid_end[0] = H.nx - H.n_ghost - 1 ; -Cool.fields.grid_end[1] = H.ny - H.n_ghost - 1 ; -Cool.fields.grid_end[2] = H.nz - H.n_ghost - 1 ; - -Cool.fields.grid_dx = 0.0; // used only for H2 self-shielding approximation - -Cool.fields.density = C.density; -Cool.fields.internal_energy = (Real *) malloc(Cool.field_size * sizeof(Real)); -// Cool.fields.x_velocity = (Real *) malloc(Cool.field_size * sizeof(Real)); -// Cool.fields.y_velocity = (Real *) malloc(Cool.field_size * sizeof(Real)); -// Cool.fields.z_velocity = (Real *) malloc(Cool.field_size * sizeof(Real)); -Cool.fields.x_velocity = NULL; -Cool.fields.y_velocity = NULL; -Cool.fields.z_velocity = NULL; - - -chprintf( " Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); -Cool.fields.HI_density = &C.scalar[ 0*n_cells ]; -Cool.fields.HII_density = &C.scalar[ 1*n_cells ]; -Cool.fields.HeI_density = &C.scalar[ 2*n_cells ]; -Cool.fields.HeII_density = &C.scalar[ 3*n_cells ]; -Cool.fields.HeIII_density = &C.scalar[ 4*n_cells ]; -Cool.fields.e_density = &C.scalar[ 5*n_cells ]; - -#ifdef GRACKLE_METALS -chprintf( " Allocating memory for: metal density\n"); -Cool.fields.metal_density = &C.scalar[ 6*n_cells ]; -#else -Cool.fields.metal_density = NULL; -#endif +void Grid3D::Allocate_Memory_Grackle() +{ + int n_cells = H.nx * H.ny * H.nz; + int nx = Grav.nx_local; + int ny = Grav.ny_local; + int nz = Grav.nz_local; + // Set grid dimension and size. + Cool.field_size = n_cells; + Cool.fields.grid_rank = 3; + Cool.fields.grid_dimension = new int[3]; + Cool.fields.grid_start = new int[3]; + Cool.fields.grid_end = new int[3]; + Cool.fields.grid_dimension[0] = H.nx; // the active dimension + Cool.fields.grid_dimension[1] = H.ny; // the active dimension + Cool.fields.grid_dimension[2] = H.nz; // the active dimension + // grid_start and grid_end are used to ignore ghost zones. + Cool.fields.grid_start[0] = H.n_ghost; + Cool.fields.grid_start[1] = H.n_ghost; + Cool.fields.grid_start[2] = H.n_ghost; + Cool.fields.grid_end[0] = H.nx - H.n_ghost - 1; + Cool.fields.grid_end[1] = H.ny - H.n_ghost - 1; + Cool.fields.grid_end[2] = H.nz - H.n_ghost - 1; + + Cool.fields.grid_dx = 0.0; // used only for H2 self-shielding approximation + + Cool.fields.density = C.density; + Cool.fields.internal_energy = (Real *)malloc(Cool.field_size * sizeof(Real)); + // Cool.fields.x_velocity = (Real *) malloc(Cool.field_size * + // sizeof(Real)); Cool.fields.y_velocity = (Real *) + // malloc(Cool.field_size * sizeof(Real)); Cool.fields.z_velocity = (Real + // *) malloc(Cool.field_size * sizeof(Real)); + Cool.fields.x_velocity = NULL; + Cool.fields.y_velocity = NULL; + Cool.fields.z_velocity = NULL; + + chprintf(" Allocating memory for: HI, HII, HeI, HeII, HeIII, e densities\n"); + Cool.fields.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + Cool.fields.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + Cool.fields.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + Cool.fields.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + Cool.fields.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + Cool.fields.e_density = &C.host[H.n_cells * grid_enum::e_density]; -#ifdef OUTPUT_TEMPERATURE -Cool.temperature = (Real *) malloc(Cool.field_size * sizeof(Real)); -#endif -} + #ifdef GRACKLE_METALS + chprintf(" Allocating memory for: metal density\n"); + Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density]; + #else + Cool.fields.metal_density = NULL; + #endif + #ifdef OUTPUT_TEMPERATURE + Cool.temperature = (Real *)malloc(Cool.field_size * sizeof(Real)); + #endif +} -void Cool_GK::Free_Memory( ){ +void Cool_GK::Free_Memory() +{ // free( fields.x_velocity ); // free( fields.y_velocity ); // free( fields.z_velocity ); - free( fields.internal_energy ); + free(fields.internal_energy); #ifdef OUTPUT_TEMPERATURE - free( temperature ); + free(temperature); #endif - } #endif - diff --git a/src/cooling_grackle/cool_grackle.h b/src/cooling_grackle/cool_grackle.h index c1fab3812..0014f7e75 100644 --- a/src/cooling_grackle/cool_grackle.h +++ b/src/cooling_grackle/cool_grackle.h @@ -1,18 +1,17 @@ #ifdef COOLING_GRACKLE -#ifndef INIT_GRACKLE_H -#define INIT_GRACKLE_H + #ifndef INIT_GRACKLE_H + #define INIT_GRACKLE_H -#include "../global/global.h" + #include "../global/global.h" extern "C" { -#include + #include } class Cool_GK { - public: - + public: code_units units; chemistry_data *data; @@ -27,9 +26,9 @@ class Cool_GK Real temperature_units; - #ifdef OUTPUT_TEMPERATURE + #ifdef OUTPUT_TEMPERATURE Real *temperature; - #endif + #endif Real tiny_number; @@ -39,17 +38,15 @@ class Cool_GK grackle_field_data fields; int field_size; + Cool_GK(void); -Cool_GK( void ); - -void Initialize( struct parameters *P, Cosmology &Cosmo ); + void Initialize(struct Parameters *P, Cosmology &Cosmo); -void Free_Memory(); -// void Do_Cooling_Step( Real dt ); - -Real Get_Mean_Molecular_Weight( int cell_id ); + void Free_Memory(); + // void Do_Cooling_Step( Real dt ); + Real Get_Mean_Molecular_Weight(int cell_id); }; -#endif + #endif #endif diff --git a/src/cooling_grackle/grackle_functions.cpp b/src/cooling_grackle/grackle_functions.cpp index 6e1b48ed6..d68281c3d 100644 --- a/src/cooling_grackle/grackle_functions.cpp +++ b/src/cooling_grackle/grackle_functions.cpp @@ -1,124 +1,120 @@ #ifdef COOLING_GRACKLE -#include -#include -#include -#include "../io/io.h" -#include "../cooling_grackle/cool_grackle.h" - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif - - - + #include + #include + #include + #include "../cooling_grackle/cool_grackle.h" + #include "../io/io.h" -void Grid3D::Initialize_Fields_Grackle(){ + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +void Grid3D::Initialize_Fields_Grackle() +{ int nx_g, ny_g, nz_g, nx, ny, nz, nGHST; - nx_g = H.nx; - ny_g = H.ny; - nz_g = H.nz; - nx = H.nx_real; - ny = H.ny_real; - nz = H.nz_real; + nx_g = H.nx; + ny_g = H.ny; + nz_g = H.nz; + nx = H.nx_real; + ny = H.ny_real; + nz = H.nz_real; nGHST = H.n_ghost; Real d, vx, vy, vz, E, Ekin, GE, U; bool flag_DE; int i, j, k, i_g, j_g, k_g, id; - for (k=0; kH0; + cosmo_h = H0 / 100; + H0 /= 1000; //[km/s / kpc] + Omega_M = P->Omega_M; + Omega_L = P->Omega_L; + Omega_K = 1 - (Omega_M + Omega_L); + Omega_b = P->Omega_b; - chprintf( "Cosmological Simulation\n"); - - H0 = P-> H0; - cosmo_h = H0/100; - H0 /= 1000; //[km/s / kpc] - Omega_M = P-> Omega_M; - Omega_L = P-> Omega_L; - Omega_K = 1 - ( Omega_M + Omega_L ); - Omega_b = P-> Omega_b; - - if(strcmp(P->init, "Read_Grid")==0){ + if (strcmp(P->init, "Read_Grid") == 0) { // Read scale factor value from Particles current_z = Particles.current_z; current_a = Particles.current_a; - } - else{ - current_z = P->Init_redshift; - current_a = 1. / ( current_z + 1 ); + } else { + current_z = P->Init_redshift; + current_a = 1. / (current_z + 1); Particles.current_z = current_z; Particles.current_a = current_a; } - // Set Scale factor in Gravity Grav.current_a = current_a; @@ -42,53 +39,39 @@ void Cosmology::Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Pa Grav.Gconst = cosmo_G; max_delta_a = 0.001; - delta_a = max_delta_a; + delta_a = max_delta_a; // Initialize Time and set the time conversion - t_secs = 0; + t_secs = 0; time_conversion = KPC; - // Set Normalization factors - r_0_dm = P->xlen/P->nx; - t_0_dm = 1. / H0; - v_0_dm = r_0_dm / t_0_dm / cosmo_h; - rho_0_dm = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_M /cosmo_h/cosmo_h; - rho_mean_baryon = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_b /cosmo_h/cosmo_h; + r_0_dm = P->xlen / P->nx; + t_0_dm = 1. / H0; + v_0_dm = r_0_dm / t_0_dm / cosmo_h; + rho_0_dm = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h; + rho_mean_baryon = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_b / cosmo_h / cosmo_h; // dens_avrg = 0; - r_0_gas = 1.0; - rho_0_gas = 3*H0*H0 / ( 8*M_PI*cosmo_G ) * Omega_M /cosmo_h/cosmo_h; - t_0_gas = 1/H0*cosmo_h; - v_0_gas = r_0_gas / t_0_gas; + r_0_gas = 1.0; + rho_0_gas = 3 * H0 * H0 / (8 * M_PI * cosmo_G) * Omega_M / cosmo_h / cosmo_h; + t_0_gas = 1 / H0 * cosmo_h; + v_0_gas = r_0_gas / t_0_gas; phi_0_gas = v_0_gas * v_0_gas; - p_0_gas = rho_0_gas * v_0_gas * v_0_gas; - e_0_gas = v_0_gas * v_0_gas; - - chprintf( " H0: %f\n", H0 * 1000 ); - chprintf( " Omega_L: %f\n", Omega_L ); - chprintf( " Omega_M: %f\n", Omega_M ); - chprintf( " Omega_b: %f\n", Omega_b ); - chprintf( " Current_a: %f\n", current_a ); - chprintf( " Current_z: %f\n", current_z ); - chprintf( " rho_0: %f\n", rho_0_gas ); - chprintf( " v_0: %f \n", v_0_gas ); - chprintf( " Max delta_a: %f \n", MAX_DELTA_A); - - Set_Scale_Outputs( P ); - + p_0_gas = rho_0_gas * v_0_gas * v_0_gas; + e_0_gas = v_0_gas * v_0_gas; + + chprintf(" H0: %f\n", H0 * 1000); + chprintf(" Omega_L: %f\n", Omega_L); + chprintf(" Omega_M: %f\n", Omega_M); + chprintf(" Omega_b: %f\n", Omega_b); + chprintf(" Current_a: %f\n", current_a); + chprintf(" Current_z: %f\n", current_z); + chprintf(" rho_0: %f\n", rho_0_gas); + chprintf(" v_0: %f \n", v_0_gas); + chprintf(" Max delta_a: %f \n", MAX_DELTA_A); + + Set_Scale_Outputs(P); } - - - - - - - - - - - - #endif diff --git a/src/cosmology/cosmology.h b/src/cosmology/cosmology.h index b45e904b1..1e7c9bd1c 100644 --- a/src/cosmology/cosmology.h +++ b/src/cosmology/cosmology.h @@ -1,19 +1,19 @@ #ifdef COSMOLOGY -#ifndef COSMOLOGY_H -#define COSMOLOGY_H + #ifndef COSMOLOGY_H + #define COSMOLOGY_H -#include -#include -#include "../global/global.h" -#include "../particles/particles_3D.h" -#include "../gravity/grav3D.h" + #include + #include + + #include "../global/global.h" + #include "../gravity/grav3D.h" + #include "../particles/particles_3D.h" class Cosmology { -public: - + public: Real H0; Real Omega_M; Real Omega_L; @@ -54,21 +54,19 @@ class Cosmology Real next_output; bool exit_now; + Cosmology(void); + void Initialize(struct Parameters *P, Grav3D &Grav, Particles3D &Particles); - Cosmology( void ); - void Initialize( struct parameters *P, Grav3D &Grav, Particles_3D &Particles ); - - void Load_Scale_Outputs( struct parameters *P ); - void Set_Scale_Outputs( struct parameters *P ); + void Load_Scale_Outputs(struct Parameters *P); + void Set_Scale_Outputs(struct Parameters *P); - void Set_Next_Scale_Output( ); + void Set_Next_Scale_Output(); - Real Get_Hubble_Parameter( Real a ); - - Real Get_da_from_dt( Real dt ); - Real Get_dt_from_da( Real da ); + Real Get_Hubble_Parameter(Real a); + Real Get_da_from_dt(Real dt); + Real Get_dt_from_da(Real da); }; -#endif + #endif #endif diff --git a/src/cosmology/cosmology_functions.cpp b/src/cosmology/cosmology_functions.cpp index c1ceb8299..f00c7e174 100644 --- a/src/cosmology/cosmology_functions.cpp +++ b/src/cosmology/cosmology_functions.cpp @@ -1,133 +1,133 @@ #ifdef COSMOLOGY + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../grid/grid_enum.h" + #include "../io/io.h" -#include "../grid/grid3D.h" -#include "../global/global.h" -#include "../io/io.h" - - - -void Grid3D::Initialize_Cosmology( struct parameters *P ){ - - chprintf( "Initializing Cosmology... \n"); - Cosmo.Initialize( P, Grav, Particles ); +void Grid3D::Initialize_Cosmology(struct Parameters *P) +{ + chprintf("Initializing Cosmology... \n"); + Cosmo.Initialize(P, Grav, Particles); // Change to comoving Cosmological System - Change_Cosmological_Frame_Sytem( true ); + Change_Cosmological_Frame_Sytem(true); - if ( fabs( Cosmo.current_a - Cosmo.next_output ) < 1e-5 ) H.Output_Now = true; - - chprintf( "Cosmology Successfully Initialized. \n\n"); + if (fabs(Cosmo.current_a - Cosmo.next_output) < 1e-5) { + H.Output_Now = true; + } + chprintf("Cosmology Successfully Initialized. \n\n"); } -Real Cosmology::Get_da_from_dt( Real dt ){ - Real a2 = current_a * current_a; - Real a_dot = sqrt( Omega_M/current_a + a2*Omega_L + Omega_K ) * H0 ; +Real Cosmology::Get_da_from_dt(Real dt) +{ + Real a2 = current_a * current_a; + Real a_dot = sqrt(Omega_M / current_a + a2 * Omega_L + Omega_K) * H0; return a_dot * dt; } -Real Cosmology::Get_dt_from_da( Real da ){ - Real a2 = current_a * current_a; - Real a_dot = sqrt( Omega_M/current_a + a2*Omega_L + Omega_K ) * H0 ; +Real Cosmology::Get_dt_from_da(Real da) +{ + Real a2 = current_a * current_a; + Real a_dot = sqrt(Omega_M / current_a + a2 * Omega_L + Omega_K) * H0; return da / a_dot; } -Real Cosmology::Get_Hubble_Parameter( Real a ){ - Real a2 = a * a; - Real a3 = a2 * a; - Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L ); +Real Cosmology::Get_Hubble_Parameter(Real a) +{ + Real a2 = a * a; + Real a3 = a2 * a; + Real factor = (Omega_M / a3 + Omega_K / a2 + Omega_L); return H0 * sqrt(factor); } -void Grid3D::Change_Cosmological_Frame_Sytem( bool forward ){ - - if (forward) chprintf( " Converting to Cosmological Comoving System\n"); - else chprintf( " Converting to Cosmological Physical System\n"); +void Grid3D::Change_Cosmological_Frame_Sytem(bool forward) +{ + if (forward) { + chprintf(" Converting to Cosmological Comoving System\n"); + } else { + chprintf(" Converting to Cosmological Physical System\n"); + } - Change_DM_Frame_System( forward ); + Change_DM_Frame_System(forward); #ifndef ONLY_PARTICLES - Change_GAS_Frame_System_GPU( forward ); + Change_GAS_Frame_System_GPU(forward); - Change_GAS_Frame_System( forward ); - #endif//ONLY_PARTICLES + Change_GAS_Frame_System(forward); + #endif // ONLY_PARTICLES } -void Grid3D::Change_DM_Frame_System( bool forward ){ - +void Grid3D::Change_DM_Frame_System(bool forward) +{ #ifdef PARTICLES_CPU part_int_t pIndx; Real vel_factor; vel_factor = 1; - - for ( pIndx=0; pIndx= nx || tid_y >= ny || tid_z >= nz ) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) { + return; + } - tid_grid = tid_x + tid_y*nx + tid_z*nx*ny; + tid_grid = tid_x + tid_y * nx + tid_z * nx * ny; - density_d[tid_grid] = density_d[tid_grid] * dens_factor; + density_d[tid_grid] = density_d[tid_grid] * dens_factor; momentum_x_d[tid_grid] = momentum_x_d[tid_grid] * momentum_factor; momentum_y_d[tid_grid] = momentum_y_d[tid_grid] * momentum_factor; momentum_z_d[tid_grid] = momentum_z_d[tid_grid] * momentum_factor; - Energy_d[tid_grid] = Energy_d[tid_grid] * energy_factor; + Energy_d[tid_grid] = Energy_d[tid_grid] * energy_factor; #ifdef DE - GasEnergy_d[tid_grid] = GasEnergy_d[tid_grid] * energy_factor; + GasEnergy_d[tid_grid] = GasEnergy_d[tid_grid] * energy_factor; #endif - //NOTE If CHEMISTRY_GPU I need to add the conversion for the chemical species here - + // NOTE If CHEMISTRY_GPU I need to add the conversion for the chemical species + // here } - -void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){ - +void Grid3D::Change_GAS_Frame_System_GPU(bool forward) +{ Real dens_factor, momentum_factor, energy_factor; - if ( forward ){ - dens_factor = 1 / Cosmo.rho_0_gas; + if (forward) { + dens_factor = 1 / Cosmo.rho_0_gas; momentum_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas * Cosmo.current_a; - energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a; - } - else{ - dens_factor = Cosmo.rho_0_gas; - momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a; - energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; + energy_factor = 1 / Cosmo.rho_0_gas / Cosmo.v_0_gas / Cosmo.v_0_gas * Cosmo.current_a * Cosmo.current_a; + } else { + dens_factor = Cosmo.rho_0_gas; + momentum_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas / Cosmo.current_a; + energy_factor = Cosmo.rho_0_gas * Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; } int nx, ny, nz; @@ -60,9 +58,9 @@ void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){ nz = H.nz; // set values for GPU kernels - int tpb_x = TPBX_COSMO; - int tpb_y = TPBY_COSMO; - int tpb_z = TPBZ_COSMO; + int tpb_x = TPBX_COSMO; + int tpb_y = TPBY_COSMO; + int tpb_z = TPBZ_COSMO; int ngrid_x = (nx - 1) / tpb_x + 1; int ngrid_y = (ny - 1) / tpb_y + 1; int ngrid_z = (nz - 1) / tpb_z + 1; @@ -78,12 +76,9 @@ void Grid3D::Change_GAS_Frame_System_GPU( bool forward ){ GasEnergy_d = NULL; #endif - hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, 0, dens_factor, momentum_factor, energy_factor, nx, ny, nz, - C.d_density, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, C.d_Energy, GasEnergy_d ); - + hipLaunchKernelGGL(Change_GAS_Frame_System_kernel, dim3dGrid, dim3dBlock, 0, 0, dens_factor, momentum_factor, + energy_factor, nx, ny, nz, C.d_density, C.d_momentum_x, C.d_momentum_y, C.d_momentum_z, C.d_Energy, + GasEnergy_d); } - - - -#endif //COSMOLOGY +#endif // COSMOLOGY diff --git a/src/cosmology/cosmology_functions_gpu.h b/src/cosmology/cosmology_functions_gpu.h index ced300114..092e13bdf 100644 --- a/src/cosmology/cosmology_functions_gpu.h +++ b/src/cosmology/cosmology_functions_gpu.h @@ -1,18 +1,15 @@ -#if defined(COSMOLOGY) +#if defined(COSMOLOGY) + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" -#include "../grid/grid3D.h" -#include "../global/global.h" -#include "../io/io.h" -#include "../utils/gpu.hpp" + #define TPBX_COSMO 16 + #define TPBY_COSMO 8 + #define TPBZ_COSMO 8 -#define TPBX_COSMO 16 -#define TPBY_COSMO 8 -#define TPBZ_COSMO 8 +// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real +// Omega_L, Real Omega_K ); -// __device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K ); - - - - -#endif //COSMOLOGY +#endif // COSMOLOGY diff --git a/src/cosmology/io_cosmology.cpp b/src/cosmology/io_cosmology.cpp index c4f9aa029..7492a814c 100644 --- a/src/cosmology/io_cosmology.cpp +++ b/src/cosmology/io_cosmology.cpp @@ -1,36 +1,33 @@ #ifdef COSMOLOGY -#include -#include -#include "../cosmology/cosmology.h" -#include "../io/io.h" + #include + #include -using namespace std; - - -void Cosmology::Load_Scale_Outputs( struct parameters *P ) { + #include "../cosmology/cosmology.h" + #include "../io/io.h" +void Cosmology::Load_Scale_Outputs(struct Parameters *P) +{ char filename_1[100]; // create the filename to read from strcpy(filename_1, P->scale_outputs_file); - chprintf( " Loading Scale_Factor Outpus: %s\n", filename_1); + chprintf(" Loading Scale_Factor Outpus: %s\n", filename_1); - ifstream file_out ( filename_1 ); - string line; + std::ifstream file_out(filename_1); + std::string line; Real a_value; - if (file_out.is_open()){ - while ( getline (file_out,line) ){ - a_value = atof( line.c_str() ); - scale_outputs.push_back( a_value ); + if (file_out.is_open()) { + while (getline(file_out, line)) { + a_value = atof(line.c_str()); + scale_outputs.push_back(a_value); n_outputs += 1; // chprintf("%f\n", a_value); } file_out.close(); - n_outputs = scale_outputs.size(); + n_outputs = scale_outputs.size(); next_output_indx = 0; chprintf(" Loaded %d scale outputs \n", n_outputs); - } - else{ + } else { chprintf(" Error: Unable to open cosmology outputs file\n"); exit(1); } @@ -38,62 +35,56 @@ void Cosmology::Load_Scale_Outputs( struct parameters *P ) { chprintf(" Setting next snapshot output\n"); int scale_indx = next_output_indx; - a_value = scale_outputs[scale_indx]; + a_value = scale_outputs[scale_indx]; - while ( (current_a - a_value) > 1e-3 ){ + while ((current_a - a_value) > 1e-3) { // chprintf( "%f %f\n", a_value, current_a); scale_indx += 1; a_value = scale_outputs[scale_indx]; } next_output_indx = scale_indx; - next_output = a_value; - chprintf(" Next output index: %d \n", next_output_indx ); - chprintf(" Next output z value: %f \n", 1./next_output - 1 ); + next_output = a_value; + chprintf(" Next output index: %d \n", next_output_indx); + chprintf(" Next output z value: %f \n", 1. / next_output - 1); exit_now = false; - } -void Cosmology::Set_Scale_Outputs( struct parameters *P ){ - - if ( P->scale_outputs_file[0] == '\0' ){ - chprintf( " Output every %d timesteps.\n", P->n_steps_output ); - Real scale_end = 1 / ( P->End_redshift + 1); - scale_outputs.push_back( current_a ); - scale_outputs.push_back( scale_end ); - n_outputs = scale_outputs.size(); +void Cosmology::Set_Scale_Outputs(struct Parameters *P) +{ + if (P->scale_outputs_file[0] == '\0') { + chprintf(" Output every %d timesteps.\n", P->n_steps_output); + Real scale_end = 1 / (P->End_redshift + 1); + scale_outputs.push_back(current_a); + scale_outputs.push_back(scale_end); + n_outputs = scale_outputs.size(); next_output_indx = 0; - next_output = current_a; - chprintf(" Next output index: %d \n", next_output_indx ); - chprintf(" Next output z value: %f \n", 1./next_output - 1 ); + next_output = current_a; + chprintf(" Next output index: %d \n", next_output_indx); + chprintf(" Next output z value: %f \n", 1. / next_output - 1); + } else { + Load_Scale_Outputs(P); } - else Load_Scale_Outputs( P ); - - - } - -void Cosmology::Set_Next_Scale_Output( ){ - - +void Cosmology::Set_Next_Scale_Output() +{ int scale_indx = next_output_indx; - Real a_value = scale_outputs[scale_indx]; - // chprintf("Setting next output index. Current index: %d n_outputs: %d ", scale_indx, n_outputs); + Real a_value = scale_outputs[scale_indx]; + // chprintf("Setting next output index. Current index: %d n_outputs: %d ", + // scale_indx, n_outputs); - // if ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx = 1; + // if ( ( scale_indx == 0 ) && ( abs(a_value - current_a )<1e-5 ) )scale_indx + // = 1; scale_indx += 1; - if ( scale_indx < n_outputs ){ - a_value = scale_outputs[scale_indx]; + if (scale_indx < n_outputs) { + a_value = scale_outputs[scale_indx]; next_output_indx = scale_indx; - next_output = a_value; - } - else{ + next_output = a_value; + } else { exit_now = true; } - } - #endif diff --git a/src/dust/dust_cuda.cu b/src/dust/dust_cuda.cu new file mode 100644 index 000000000..8b72facdf --- /dev/null +++ b/src/dust/dust_cuda.cu @@ -0,0 +1,136 @@ +/*! + * \file dust_cuda.cu + * \author Helena Richie (helenarichie@gmail.com) + * \brief Contains code that updates the dust density scalar field. The dust_kernel function determines the rate of + * change of dust density, which is controlled by the sputtering timescale. The sputtering timescale is from the + * McKinnon et al. (2017) model of dust sputtering, which depends on the cell's gas density and temperature. + */ + +#ifdef DUST + + // STL includes + #include + + #include + #include + #include + + // Local includes + #include "../dust/dust_cuda.h" + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../grid/grid_enum.h" + #include "../utils/cuda_utilities.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius) +{ + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Dust_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dt, gamma, + grain_radius); + GPU_Error_Check(); +} + +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius) +{ + // get grid indices + int n_cells = nx * ny * nz; + int is, ie, js, je, ks, ke; + cuda_utilities::Get_Real_Indices(n_ghost, nx, ny, nz, is, ie, js, je, ks, ke); + // get a global thread ID + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int id = threadIdx.x + blockId * blockDim.x; + int id_z = id / (nx * ny); + int id_y = (id - id_z * nx * ny) / nx; + int id_x = id - id_z * nx * ny - id_y * nx; + + // define physics variables + Real density_gas, density_dust; // fluid mass densities + Real number_density; // gas number density + Real mu = 0.6; // mean molecular weight + + // define integration variables + Real dd_dt; // instantaneous rate of change in dust density + Real dd; // change in dust density at current timestep + Real dd_max = 0.01; // allowable percentage of dust density increase + Real dt_sub; // refined timestep + + if (id_x >= is && id_x < ie && id_y >= js && id_y < je && id_z >= ks && id_z < ke) { + // get conserved quanitites + density_gas = dev_conserved[id + n_cells * grid_enum::density]; + density_dust = dev_conserved[id + n_cells * grid_enum::dust_density]; + + // convert mass density to number density + number_density = density_gas * DENSITY_UNIT / (mu * MP); + + // Compute the temperature + #ifdef DE + Real const gas_energy = dev_conserved[id + n_cells * grid_enum::GasEnergy]; + Real const temperature = hydro_utilities::Calc_Temp_DE(gas_energy, gamma, number_density); + #else // DE is not enabled + Real const energy = dev_conserved[id + n_cells * grid_enum::Energy]; + Real const momentum_x = dev_conserved[id + n_cells * grid_enum::momentum_x]; + Real const momentum_y = dev_conserved[id + n_cells * grid_enum::momentum_y]; + Real const momentum_z = dev_conserved[id + n_cells * grid_enum::momentum_z]; + + #ifdef MHD + auto const [magnetic_x, magnetic_y, magnetic_z] = + mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); + Real const temperature = + hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, momentum_z, gamma, + number_density, magnetic_x, magnetic_y, magnetic_z); + #else // MHD is not defined + Real const temperature = hydro_utilities::Calc_Temp_Conserved(energy, density_gas, momentum_x, momentum_y, + momentum_z, gamma, number_density); + #endif // MHD + #endif // DE + + Real tau_sp = Calc_Sputtering_Timescale(number_density, temperature, grain_radius) / + TIME_UNIT; // sputtering timescale, kyr (sim units) + + dd_dt = Calc_dd_dt(density_dust, tau_sp); // rate of change in dust density at current timestep + dd = dd_dt * dt; // change in dust density at current timestep + + // ensure that dust density is not changing too rapidly + while (dd / density_dust > dd_max) { + dt_sub = dd_max * density_dust / dd_dt; + density_dust += dt_sub * dd_dt; + dt -= dt_sub; + dd_dt = Calc_dd_dt(density_dust, tau_sp); + dd = dt * dd_dt; + } + + // update dust density + density_dust += dd; + + dev_conserved[id + n_cells * grid_enum::dust_density] = density_dust; + } +} + +// McKinnon et al. (2017) sputtering timescale +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature, Real grain_radius) +{ + Real a = grain_radius; // dust grain size in units of 0.1 micrometers + Real temperature_0 = 2e6; // temp above which the sputtering rate is ~constant in K + Real omega = 2.5; // controls the low-temperature scaling of the sputtering rate + Real A = 5.3618e15; // 0.17 Gyr in s + + number_density /= (6e-4); // gas number density in units of 10^-27 g/cm^3 + + // sputtering timescale, s + Real tau_sp = A * (a / number_density) * (pow(temperature_0 / temperature, omega) + 1); + + return tau_sp; +} + +// McKinnon et al. (2017) sputtering model +__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp) { return -density_dust / (tau_sp / 3); } + +#endif // DUST diff --git a/src/dust/dust_cuda.h b/src/dust/dust_cuda.h new file mode 100644 index 000000000..212901e8a --- /dev/null +++ b/src/dust/dust_cuda.h @@ -0,0 +1,70 @@ +/*! + * \file dust_cuda.h + * \author Helena Richie (helenarichie@pitt.edu) + * \brief Contains the declaration for the kernel that updates the dust density scalar in dev_conserved. + * + */ + +#ifdef DUST + + #ifndef DUST_CUDA_H + #define DUST_CUDA_H + + #include + + #include "../global/global.h" + #include "../utils/gpu.hpp" + +/*! + * \brief Launch the dust kernel. + * + * \param[in,out] dev_conserved The device conserved variable array. + * \param[in] nx Number of cells in the x-direction + * \param[in] ny Number of cells in the y-direction + * \param[in] nz Number of cells in the z-direction + * \param[in] n_ghost Number of ghost cells + * \param[in] n_fields Number of fields in dev_conserved + * \param[in] dt Simulation timestep + * \param[in] gamma Specific heat ratio + */ +void Dust_Update(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius); + +/*! + * \brief Compute the change in dust density for a cell and update its value in dev_conserved. + * + * \param[in,out] dev_conserved The device conserved variable array. The dust field is updated in this function. If dual + * energy is turned on, then the dual energy field is updated, as well. + * \param[in] nx Number of cells in the x-direction + * \param[in] ny Number of cells in the y-direction + * \param[in] nz Number of cells in the z-direction + * \param[in] n_ghost Number of ghost cells + * \param[in] n_fields Number of fields in dev_conserved + * \param[in] dt Simulation timestep + * \param[in] gamma Specific heat ratio + */ +__global__ void Dust_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dt, Real gamma, + Real grain_radius); + +/*! + * \brief Compute the sputtering timescale based on a cell's density and temperature. + * + * \param[in] number_density Gas number density in cm^-3 + * \param[in] temperature Gas temperature in K + * + * \return Real Sputtering timescale in seconds (McKinnon et al. 2017) + */ +__device__ __host__ Real Calc_Sputtering_Timescale(Real number_density, Real temperature, Real grain_radius); + +/*! + * \brief Compute the rate of change in dust density based on the current dust density and sputtering timescale. + * + * \param[in] density_dust Dust mass density in M_sun/kpc^3 + * \param[in] tau_sp Sputtering timescale in kyr + * + * \return Real Dust density rate of change (McKinnon et al. 2017) + */ +__device__ __host__ Real Calc_dd_dt(Real density_dust, Real tau_sp); + + #endif // DUST_CUDA_H +#endif // DUST \ No newline at end of file diff --git a/src/dust/dust_cuda_tests.cpp b/src/dust/dust_cuda_tests.cpp new file mode 100644 index 000000000..5b59b2dc0 --- /dev/null +++ b/src/dust/dust_cuda_tests.cpp @@ -0,0 +1,72 @@ +/*! + * \file dust_cuda_tests.cpp + * \author Helena Richie (helenarichie@gmail.com) + * \brief Tests for dust model functions. + */ + +// STL Includes +#include + +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../dust/dust_cuda.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" + +#ifdef DUST + +TEST(tDUSTTestSputteringTimescale, CorrectInputExpectCorrectOutput) +{ + // Parameters + Real YR_IN_S = 3.154e7; + Real const k_test_number_density = 1; + Real const k_test_temperature = pow(10, 5.0); + Real const k_test_grain_radius = 1; + Real const k_fiducial_num = 182565146.96398282; + + Real test_num = + Calc_Sputtering_Timescale(k_test_number_density, k_test_temperature, k_test_grain_radius) / YR_IN_S; // yr + + double abs_diff; + int64_t ulps_diff; + + bool is_true; + + is_true = testing_utilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + + EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num << std::endl + << "The test value is: " << test_num << std::endl + << "The absolute difference is: " << abs_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; +} + +TEST(tDUSTTestSputteringGrowthRate, CorrectInputExpectCorrectOutput) +{ + // Parameters + Real YR_IN_S = 3.154e7; + Real const k_test_tau_sp = 0.17e6; // kyr + Real const k_test_density_dust = 1e-26 / DENSITY_UNIT; // sim units + Real const k_fiducial_num = -2.6073835738056728; + + Real test_num = Calc_dd_dt(k_test_density_dust, k_test_tau_sp); + + double abs_diff; + int64_t ulps_diff; + + bool is_true; + + is_true = testing_utilities::nearlyEqualDbl(k_fiducial_num, test_num, abs_diff, ulps_diff); + + EXPECT_TRUE(is_true) << "The fiducial value is: " << k_fiducial_num << std::endl + << "The test value is: " << test_num << std::endl + << "The absolute difference is: " << abs_diff << std::endl + << "The ULP difference is: " << ulps_diff << std::endl; +} + +#endif // DUST \ No newline at end of file diff --git a/src/global/global.cpp b/src/global/global.cpp index 1f6a5cbfa..64eac0d5b 100644 --- a/src/global/global.cpp +++ b/src/global/global.cpp @@ -1,415 +1,493 @@ /* \file global.cpp * \brief Global function definitions.*/ +#include "../global/global.h" +#include #include -#include #include #include #include +#include + #include -#include -#include "../global/global.h" -#include "../io/io.h" //defines chprintf + +#include "../io/io.h" //defines chprintf +#include "../utils/error_handling.h" // defines ASSERT /* Global variables */ -Real gama; // Ratio of specific heats -Real C_cfl; // CFL number +Real gama; // Ratio of specific heats +Real C_cfl; // CFL number #ifdef PARTICLES -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA // Constants for the inital size of the buffers for particles transfer // and the number of data transferred for each particle int N_PARTICLES_TRANSFER; int N_DATA_PER_PARTICLE_TRANSFER; + #endif #endif -#endif - /*! \fn void Set_Gammas(Real gamma_in) * \brief Set gamma values for Riemann solver */ void Set_Gammas(Real gamma_in) { - //set gamma - gama = gamma_in; - + // set gamma + gama = gamma_in; + CHOLLA_ASSERT(gama > 1.0, "Gamma must be greater than one."); } - -/*! \fn double get_time(void) +/*! \fn double Get_Time(void) * \brief Returns the current clock time. */ -double get_time(void) +double Get_Time(void) { struct timeval timer; - gettimeofday(&timer,NULL); - return timer.tv_sec + 1.0e-6*timer.tv_usec; + gettimeofday(&timer, NULL); + return timer.tv_sec + 1.0e-6 * timer.tv_usec; } -/*! \fn int sgn +/*! \fn int Sgn * \brief Mathematical sign function. Returns sign of x. */ -int sgn(Real x) +int Sgn(Real x) { - if (x < 0) return -1; - else return 1; + if (x < 0) { + return -1; + } else { + return 1; + } } -#ifndef CUDA -/*! \fn Real calc_eta(Real cW[], Real gamma) - * \brief Calculate the eta value for the H correction. */ -Real calc_eta(Real cW[], Real gamma) -{ - Real pl, pr, al, ar; - - pl = (cW[8] - 0.5*(cW[2]*cW[2] + cW[4]*cW[4] + cW[6]*cW[6])/cW[0]) * (gamma-1.0); - pl = fmax(pl, TINY_NUMBER); - pr = (cW[9] - 0.5*(cW[3]*cW[3] + cW[5]*cW[5] + cW[7]*cW[7])/cW[1]) * (gamma-1.0); - pr = fmax(pr, TINY_NUMBER); - - al = sqrt(gamma*pl/cW[0]); - ar = sqrt(gamma*pr/cW[1]); +// global mpi-related variables (they are declared here because they are initialized even when +// the MPI_CHOLLA variable is not defined) - return 0.5*fabs((cW[3]/cW[1] + ar) - (cW[2]/cW[0]-al)); +int procID; /*process rank*/ +int nproc; /*number of processes in global comm*/ +int root; /*rank of root process*/ +/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables + * that still meaningful in non-mpi simulations. + */ +void Init_Global_Parallel_Vars_No_MPI() +{ +#ifdef MPI_CHOLLA + CHOLLA_ERROR("This function should not be executed when compiled with MPI"); +#endif + procID = 0; + nproc = 1; + root = 0; } -#endif //NO CUDA - -/*! \fn char trim(char *s) +/*! \fn char Trim(char *s) * \brief Gets rid of trailing and leading whitespace. */ -char *trim (char * s) +char *Trim(char *s) { /* Initialize start, end pointers */ - char *s1 = s, *s2 = &s[strlen (s) - 1]; + char *s1 = s, *s2 = &s[strlen(s) - 1]; /* Trim and delimit right side */ - while ( (isspace (*s2)) && (s2 >= s1) ) + while ((isspace(*s2)) && (s2 >= s1)) { s2--; - *(s2+1) = '\0'; + } + *(s2 + 1) = '\0'; /* Trim left side */ - while ( (isspace (*s1)) && (s1 < s2) ) + while ((isspace(*s1)) && (s1 < s2)) { s1++; + } /* Copy finished string */ - strcpy (s, s1); + strcpy(s, s1); return s; } -const std::set optionalParams = {"flag_delta", "ddelta_dt", "n_delta", - "Lz" , "Lx" , "phi" , "theta", "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", - "Init_redshift", "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z" }; +// NOLINTNEXTLINE(cert-err58-cpp) +const std::set optionalParams = { + "flag_delta", "ddelta_dt", "n_delta", "Lz", "Lx", "phi", "theta", + "delta", "nzr", "nxr", "H0", "Omega_M", "Omega_L", "Init_redshift", + "End_redshift", "tile_length", "n_proc_x", "n_proc_y", "n_proc_z"}; -/*! \fn int is_param_valid(char *name); - * \brief Verifies that a param is valid (even if not needed). Avoids "warnings" in output. */ -int is_param_valid(const char* param_name) { - for (auto it=optionalParams.begin(); it != optionalParams.end(); ++it) { - if (strcmp(param_name, *it) == 0) return 1; +/*! \fn int Is_Param_Valid(char *name); + * \brief Verifies that a param is valid (even if not needed). Avoids + * "warnings" in output. */ +int Is_Param_Valid(const char *param_name) +{ + // for (auto optionalParam = optionalParams.begin(); optionalParam != optionalParams.end(); ++optionalParam) { + for (const auto *optionalParam : optionalParams) { + if (strcmp(param_name, optionalParam) == 0) { + return 1; + } } return 0; } -void parse_param(char *name,char *value, struct parameters *parms); +void Parse_Param(char *name, char *value, struct Parameters *parms); - -/*! \fn void parse_params(char *param_file, struct parameters * parms); +/*! \fn void Parse_Params(char *param_file, struct Parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -void parse_params (char *param_file, struct parameters * parms, int argc, char** argv) +void Parse_Params(char *param_file, struct Parameters *parms, int argc, char **argv) { int buf; char *s, buff[256]; - FILE *fp = fopen (param_file, "r"); - if (fp == NULL) - { + FILE *fp = fopen(param_file, "r"); + if (fp == NULL) { chprintf("Exiting at file %s line %d: failed to read param file %s \n", __FILE__, __LINE__, param_file); exit(1); return; } - // set default hydro file output parameter - parms->n_hydro=1; - parms->n_particle=1; - parms->n_slice=1; - parms->n_projection=1; - parms->n_rotated_projection=1; - -#ifdef ROTATED_PROJECTION - //initialize rotation parameters to zero - parms->delta = 0; - parms->theta = 0; - parms->phi = 0; - parms->n_delta = 0; - parms->ddelta_dt = 0; - parms->flag_delta = 0; -#endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY -//Initialize file name as an empty string -parms->scale_outputs_file[0] = '\0'; + // Initialize file name as an empty string + parms->scale_outputs_file[0] = '\0'; #endif - /* Read next line */ - while ((s = fgets (buff, sizeof buff, fp)) != NULL) - { + while ((s = fgets(buff, sizeof buff, fp)) != NULL) { /* Skip blank lines and comments */ - if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') + if (buff[0] == '\n' || buff[0] == '#' || buff[0] == ';') { continue; + } /* Parse name/value pair from line */ char name[MAXLEN], value[MAXLEN]; - s = strtok (buff, "="); - if (s==NULL) + s = strtok(buff, "="); + if (s == NULL) { continue; - else - strncpy (name, s, MAXLEN); - s = strtok (NULL, "="); - if (s==NULL) + } else { + strncpy(name, s, MAXLEN); + } + s = strtok(NULL, "="); + if (s == NULL) { continue; - else - strncpy (value, s, MAXLEN); - trim (value); - parse_param(name,value,parms); + } else { + strncpy(value, s, MAXLEN); + } + Trim(value); + Parse_Param(name, value, parms); } /* Close file */ - fclose (fp); + fclose(fp); // Parse overriding args from command line for (int i = 0; i < argc; ++i) { char name[MAXLEN], value[MAXLEN]; - s = strtok (argv[i], "="); - if (s==NULL) + s = strtok(argv[i], "="); + if (s == NULL) { continue; - else - strncpy (name, s, MAXLEN); - s = strtok (NULL, "="); - if (s==NULL) + } else { + strncpy(name, s, MAXLEN); + } + s = strtok(NULL, "="); + if (s == NULL) { continue; - else - strncpy (value, s, MAXLEN); - parse_param(name,value,parms); - chprintf("Override with %s=%s\n",name,value); - + } else { + strncpy(value, s, MAXLEN); + } + Parse_Param(name, value, parms); + chprintf("Override with %s=%s\n", name, value); } } -/*! \fn void parse_param(char *name,char *value, struct parameters *parms); +/*! \fn void Parse_Param(char *name,char *value, struct Parameters *parms); * \brief Parses and sets a single param based on name and value. */ -void parse_param(char *name,char *value, struct parameters *parms){ +void Parse_Param(char *name, char *value, struct Parameters *parms) +{ /* Copy into correct entry in parameters struct */ - if (strcmp(name, "nx")==0) + if (strcmp(name, "nx") == 0) { parms->nx = atoi(value); - else if (strcmp(name, "ny")==0) + } else if (strcmp(name, "ny") == 0) { parms->ny = atoi(value); - else if (strcmp(name, "nz")==0) + } else if (strcmp(name, "nz") == 0) { parms->nz = atoi(value); - else if (strcmp(name, "tout")==0) +#ifdef STATIC_GRAV + } else if (strcmp(name, "custom_grav") == 0) { + parms->custom_grav = atoi(value); +#endif + } else if (strcmp(name, "tout") == 0) { parms->tout = atof(value); - else if (strcmp(name, "outstep")==0) + } else if (strcmp(name, "outstep") == 0) { parms->outstep = atof(value); - else if (strcmp(name, "n_steps_output")==0) + } else if (strcmp(name, "n_steps_output") == 0) { parms->n_steps_output = atoi(value); - else if (strcmp(name, "gamma")==0) + } else if (strcmp(name, "gamma") == 0) { parms->gamma = atof(value); - else if (strcmp(name, "init")==0) - strncpy (parms->init, value, MAXLEN); - else if (strcmp(name, "nfile")==0) + } else if (strcmp(name, "init") == 0) { + strncpy(parms->init, value, MAXLEN); + } else if (strcmp(name, "nfile") == 0) { parms->nfile = atoi(value); - else if (strcmp(name, "n_hydro")==0) + } else if (strcmp(name, "n_hydro") == 0) { parms->n_hydro = atoi(value); - else if (strcmp(name, "n_particle")==0) + } else if (strcmp(name, "n_particle") == 0) { parms->n_particle = atoi(value); - else if (strcmp(name, "n_projection")==0) + } else if (strcmp(name, "n_projection") == 0) { parms->n_projection = atoi(value); - else if (strcmp(name, "n_rotated_projection")==0) + } else if (strcmp(name, "n_rotated_projection") == 0) { parms->n_rotated_projection = atoi(value); - else if (strcmp(name, "n_slice")==0) + } else if (strcmp(name, "n_slice") == 0) { parms->n_slice = atoi(value); - else if (strcmp(name, "n_out_float32")==0) + } else if (strcmp(name, "n_out_float32") == 0) { parms->n_out_float32 = atoi(value); - else if (strcmp(name, "out_float32_density")==0) + } else if (strcmp(name, "out_float32_density") == 0) { parms->out_float32_density = atoi(value); - else if (strcmp(name, "out_float32_momentum_x")==0) + } else if (strcmp(name, "out_float32_momentum_x") == 0) { parms->out_float32_momentum_x = atoi(value); - else if (strcmp(name, "out_float32_momentum_y")==0) + } else if (strcmp(name, "out_float32_momentum_y") == 0) { parms->out_float32_momentum_y = atoi(value); - else if (strcmp(name, "out_float32_momentum_z")==0) + } else if (strcmp(name, "out_float32_momentum_z") == 0) { parms->out_float32_momentum_z = atoi(value); - else if (strcmp(name, "out_float32_Energy")==0) + } else if (strcmp(name, "out_float32_Energy") == 0) { parms->out_float32_Energy = atoi(value); #ifdef DE - else if (strcmp(name, "out_float32_GasEnergy")==0) + } else if (strcmp(name, "out_float32_GasEnergy") == 0) { parms->out_float32_GasEnergy = atoi(value); -#endif // DE +#endif // DE #ifdef MHD - else if (strcmp(name, "out_float32_magnetic_x")==0) + } else if (strcmp(name, "out_float32_magnetic_x") == 0) { parms->out_float32_magnetic_x = atoi(value); - else if (strcmp(name, "out_float32_magnetic_y")==0) + } else if (strcmp(name, "out_float32_magnetic_y") == 0) { parms->out_float32_magnetic_y = atoi(value); - else if (strcmp(name, "out_float32_magnetic_z")==0) + } else if (strcmp(name, "out_float32_magnetic_z") == 0) { parms->out_float32_magnetic_z = atoi(value); -#endif // MHD - else if (strcmp(name, "xmin")==0) +#endif // MHD + } else if (strcmp(name, "output_always") == 0) { + int tmp = atoi(value); + // In this case the CHOLLA_ASSERT macro runs into issuse with the readability-simplify-boolean-expr clang-tidy check + // due to some weird macro expansion stuff. That check has been disabled here for now but in clang-tidy 18 the + // IgnoreMacro option should be used instead. + // NOLINTNEXTLINE(readability-simplify-boolean-expr) + CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "output_always must be 1 or 0."); + parms->output_always = tmp; + } else if (strcmp(name, "legacy_flat_outdir") == 0) { + int tmp = atoi(value); + CHOLLA_ASSERT((tmp == 0) or (tmp == 1), "legacy_flat_outdir must be 1 or 0."); + parms->legacy_flat_outdir = tmp; + } else if (strcmp(name, "xmin") == 0) { parms->xmin = atof(value); - else if (strcmp(name, "ymin")==0) + } else if (strcmp(name, "ymin") == 0) { parms->ymin = atof(value); - else if (strcmp(name, "zmin")==0) + } else if (strcmp(name, "zmin") == 0) { parms->zmin = atof(value); - else if (strcmp(name, "xlen")==0) + } else if (strcmp(name, "xlen") == 0) { parms->xlen = atof(value); - else if (strcmp(name, "ylen")==0) + } else if (strcmp(name, "ylen") == 0) { parms->ylen = atof(value); - else if (strcmp(name, "zlen")==0) + } else if (strcmp(name, "zlen") == 0) { parms->zlen = atof(value); - else if (strcmp(name, "xl_bcnd")==0) + } else if (strcmp(name, "xl_bcnd") == 0) { parms->xl_bcnd = atoi(value); - else if (strcmp(name, "xu_bcnd")==0) + } else if (strcmp(name, "xu_bcnd") == 0) { parms->xu_bcnd = atoi(value); - else if (strcmp(name, "yl_bcnd")==0) + } else if (strcmp(name, "yl_bcnd") == 0) { parms->yl_bcnd = atoi(value); - else if (strcmp(name, "yu_bcnd")==0) + } else if (strcmp(name, "yu_bcnd") == 0) { parms->yu_bcnd = atoi(value); - else if (strcmp(name, "zl_bcnd")==0) + } else if (strcmp(name, "zl_bcnd") == 0) { parms->zl_bcnd = atoi(value); - else if (strcmp(name, "zu_bcnd")==0) + } else if (strcmp(name, "zu_bcnd") == 0) { parms->zu_bcnd = atoi(value); - else if (strcmp(name, "custom_bcnd")==0) - strncpy (parms->custom_bcnd, value, MAXLEN); - else if (strcmp(name, "outdir")==0) - strncpy (parms->outdir, value, MAXLEN); - else if (strcmp(name, "indir")==0) - strncpy (parms->indir, value, MAXLEN); - else if (strcmp(name, "rho")==0) + } else if (strcmp(name, "custom_bcnd") == 0) { + strncpy(parms->custom_bcnd, value, MAXLEN); + } else if (strcmp(name, "outdir") == 0) { + strncpy(parms->outdir, value, MAXLEN); + } else if (strcmp(name, "indir") == 0) { + strncpy(parms->indir, value, MAXLEN); + } else if (strcmp(name, "rho") == 0) { parms->rho = atof(value); - else if (strcmp(name, "vx")==0) + } else if (strcmp(name, "vx") == 0) { parms->vx = atof(value); - else if (strcmp(name, "vy")==0) + } else if (strcmp(name, "vy") == 0) { parms->vy = atof(value); - else if (strcmp(name, "vz")==0) + } else if (strcmp(name, "vz") == 0) { parms->vz = atof(value); - else if (strcmp(name, "P")==0) + } else if (strcmp(name, "P") == 0) { parms->P = atof(value); - else if (strcmp(name, "Bx")==0) + } else if (strcmp(name, "Bx") == 0) { parms->Bx = atof(value); - else if (strcmp(name, "By")==0) + } else if (strcmp(name, "By") == 0) { parms->By = atof(value); - else if (strcmp(name, "Bz")==0) + } else if (strcmp(name, "Bz") == 0) { parms->Bz = atof(value); - else if (strcmp(name, "A")==0) + } else if (strcmp(name, "A") == 0) { parms->A = atof(value); - else if (strcmp(name, "rho_l")==0) + } else if (strcmp(name, "rho_l") == 0) { parms->rho_l = atof(value); - else if (strcmp(name, "vx_l")==0) + } else if (strcmp(name, "vx_l") == 0) { parms->vx_l = atof(value); - else if (strcmp(name, "vy_l")==0) + } else if (strcmp(name, "vy_l") == 0) { parms->vy_l = atof(value); - else if (strcmp(name, "vz_l")==0) + } else if (strcmp(name, "vz_l") == 0) { parms->vz_l = atof(value); - else if (strcmp(name, "P_l")==0) + } else if (strcmp(name, "P_l") == 0) { parms->P_l = atof(value); - else if (strcmp(name, "Bx_l")==0) + } else if (strcmp(name, "Bx_l") == 0) { parms->Bx_l = atof(value); - else if (strcmp(name, "By_l")==0) + } else if (strcmp(name, "By_l") == 0) { parms->By_l = atof(value); - else if (strcmp(name, "Bz_l")==0) + } else if (strcmp(name, "Bz_l") == 0) { parms->Bz_l = atof(value); - else if (strcmp(name, "rho_r")==0) + } else if (strcmp(name, "rho_r") == 0) { parms->rho_r = atof(value); - else if (strcmp(name, "vx_r")==0) + } else if (strcmp(name, "vx_r") == 0) { parms->vx_r = atof(value); - else if (strcmp(name, "vy_r")==0) + } else if (strcmp(name, "vy_r") == 0) { parms->vy_r = atof(value); - else if (strcmp(name, "vz_r")==0) + } else if (strcmp(name, "vz_r") == 0) { parms->vz_r = atof(value); - else if (strcmp(name, "P_r")==0) + } else if (strcmp(name, "P_r") == 0) { parms->P_r = atof(value); - else if (strcmp(name, "Bx_r")==0) + } else if (strcmp(name, "Bx_r") == 0) { parms->Bx_r = atof(value); - else if (strcmp(name, "By_r")==0) + } else if (strcmp(name, "By_r") == 0) { parms->By_r = atof(value); - else if (strcmp(name, "Bz_r")==0) + } else if (strcmp(name, "Bz_r") == 0) { parms->Bz_r = atof(value); - else if (strcmp(name, "diaph")==0) + } else if (strcmp(name, "diaph") == 0) { parms->diaph = atof(value); + } else if (strcmp(name, "rEigenVec_rho") == 0) { + parms->rEigenVec_rho = atof(value); + } else if (strcmp(name, "rEigenVec_MomentumX") == 0) { + parms->rEigenVec_MomentumX = atof(value); + } else if (strcmp(name, "rEigenVec_MomentumY") == 0) { + parms->rEigenVec_MomentumY = atof(value); + } else if (strcmp(name, "rEigenVec_MomentumZ") == 0) { + parms->rEigenVec_MomentumZ = atof(value); + } else if (strcmp(name, "rEigenVec_E") == 0) { + parms->rEigenVec_E = atof(value); + } else if (strcmp(name, "rEigenVec_Bx") == 0) { + parms->rEigenVec_Bx = atof(value); + } else if (strcmp(name, "rEigenVec_By") == 0) { + parms->rEigenVec_By = atof(value); + } else if (strcmp(name, "rEigenVec_Bz") == 0) { + parms->rEigenVec_Bz = atof(value); + } else if (strcmp(name, "pitch") == 0) { + parms->pitch = atof(value); + } else if (strcmp(name, "yaw") == 0) { + parms->yaw = atof(value); + } else if (strcmp(name, "polarization") == 0) { + parms->polarization = atof(value); + } else if (strcmp(name, "radius") == 0) { + parms->radius = atof(value); + } else if (strcmp(name, "P_blast") == 0) { + parms->P_blast = atof(value); + } else if (strcmp(name, "wave_length") == 0) { + parms->wave_length = atof(value); #ifdef PARTICLES - else if (strcmp(name, "prng_seed")==0) + } else if (strcmp(name, "prng_seed") == 0) { parms->prng_seed = atoi(value); -#endif // PARTICLES +#endif // PARTICLES +#ifdef SUPERNOVA + } else if (strcmp(name, "snr_filename") == 0) { + strncpy(parms->snr_filename, value, MAXLEN); +#endif #ifdef ROTATED_PROJECTION - else if (strcmp(name, "nxr")==0) + } else if (strcmp(name, "nxr") == 0) { parms->nxr = atoi(value); - else if (strcmp(name, "nzr")==0) + } else if (strcmp(name, "nzr") == 0) { parms->nzr = atoi(value); - else if (strcmp(name, "delta")==0) + } else if (strcmp(name, "delta") == 0) { parms->delta = atof(value); - else if (strcmp(name, "theta")==0) + } else if (strcmp(name, "theta") == 0) { parms->theta = atof(value); - else if (strcmp(name, "phi")==0) + } else if (strcmp(name, "phi") == 0) { parms->phi = atof(value); - else if (strcmp(name, "Lx")==0) - parms->Lx = atof(value); - else if (strcmp(name, "Lz")==0) + } else if (strcmp(name, "Lx") == 0) { + parms->Lx = atof(value); + } else if (strcmp(name, "Lz") == 0) { parms->Lz = atof(value); - else if (strcmp(name, "n_delta")==0) + } else if (strcmp(name, "n_delta") == 0) { parms->n_delta = atoi(value); - else if (strcmp(name, "ddelta_dt")==0) + } else if (strcmp(name, "ddelta_dt") == 0) { parms->ddelta_dt = atof(value); - else if (strcmp(name, "flag_delta")==0) - parms->flag_delta = atoi(value); + } else if (strcmp(name, "flag_delta") == 0) { + parms->flag_delta = atoi(value); #endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY - else if (strcmp(name, "scale_outputs_file")==0) - strncpy (parms->scale_outputs_file, value, MAXLEN); - else if (strcmp(name, "Init_redshift")==0) - parms->Init_redshift = atof(value); - else if (strcmp(name, "End_redshift")==0) - parms->End_redshift = atof(value); - else if (strcmp(name, "H0")==0) - parms->H0 = atof(value); - else if (strcmp(name, "Omega_M")==0) - parms->Omega_M = atof(value); - else if (strcmp(name, "Omega_L")==0) - parms->Omega_L = atof(value); - else if (strcmp(name, "Omega_b")==0) - parms->Omega_b = atof(value); -#endif //COSMOLOGY + } else if (strcmp(name, "scale_outputs_file") == 0) { + strncpy(parms->scale_outputs_file, value, MAXLEN); + } else if (strcmp(name, "Init_redshift") == 0) { + parms->Init_redshift = atof(value); + } else if (strcmp(name, "End_redshift") == 0) { + parms->End_redshift = atof(value); + } else if (strcmp(name, "H0") == 0) { + parms->H0 = atof(value); + } else if (strcmp(name, "Omega_M") == 0) { + parms->Omega_M = atof(value); + } else if (strcmp(name, "Omega_L") == 0) { + parms->Omega_L = atof(value); + } else if (strcmp(name, "Omega_b") == 0) { + parms->Omega_b = atof(value); +#endif // COSMOLOGY #ifdef TILED_INITIAL_CONDITIONS - else if (strcmp(name, "tile_length")==0) - parms->tile_length = atof(value); -#endif //TILED_INITIAL_CONDITIONS + } else if (strcmp(name, "tile_length") == 0) { + parms->tile_length = atof(value); +#endif // TILED_INITIAL_CONDITIONS #ifdef SET_MPI_GRID - // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] - else if (strcmp(name, "n_proc_x")==0) - parms->n_proc_x = atoi(value); - else if (strcmp(name, "n_proc_y")==0) - parms->n_proc_y = atoi(value); - else if (strcmp(name, "n_proc_z")==0) - parms->n_proc_z = atoi(value); + // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] + } else if (strcmp(name, "n_proc_x") == 0) { + parms->n_proc_x = atoi(value); + } else if (strcmp(name, "n_proc_y") == 0) { + parms->n_proc_y = atoi(value); + } else if (strcmp(name, "n_proc_z") == 0) { + parms->n_proc_z = atoi(value); #endif - else if (strcmp(name, "bc_potential_type")==0) - parms->bc_potential_type = atoi(value); + } else if (strcmp(name, "bc_potential_type") == 0) { + parms->bc_potential_type = atoi(value); #ifdef CHEMISTRY_GPU - else if (strcmp(name, "UVB_rates_file")==0) - strncpy (parms->UVB_rates_file, value, MAXLEN); + } else if (strcmp(name, "UVB_rates_file") == 0) { + strncpy(parms->UVB_rates_file, value, MAXLEN); #endif #ifdef COOLING_GRACKLE - else if (strcmp(name, "UVB_rates_file")==0) - strncpy (parms->UVB_rates_file, value, MAXLEN); + } else if (strcmp(name, "UVB_rates_file") == 0) { + strncpy(parms->UVB_rates_file, value, MAXLEN); +#endif +#ifdef TEMPERATURE_FLOOR + } else if (strcmp(name, "temperature_floor") == 0) { + parms->temperature_floor = atof(value); + if (parms->temperature_floor == 0) { + chprintf( + "WARNING: temperature floor is set to its default value (zero)! It can be set to a different value in the " + "input parameter file.\n"); + } +#endif +#ifdef DENSITY_FLOOR + } else if (strcmp(name, "density_floor") == 0) { + parms->density_floor = atof(value); + if (parms->density_floor == 0) { + chprintf( + "WARNING: density floor is set to its default value (zero)! It can be set to a different value in the input " + "parameter file.\n"); + } +#endif +#ifdef SCALAR_FLOOR + } else if (strcmp(name, "scalar_floor") == 0) { + parms->scalar_floor = atof(value); + if (parms->scalar_floor == 0) { + chprintf( + "WARNING: scalar floor is set to its default value (zero)! It can be set to a different value in the input " + "parameter file.\n"); + } #endif #ifdef ANALYSIS - else if (strcmp(name, "analysis_scale_outputs_file")==0) - strncpy (parms->analysis_scale_outputs_file, value, MAXLEN); - else if (strcmp(name, "analysisdir")==0) - strncpy (parms->analysisdir, value, MAXLEN); - else if (strcmp(name, "lya_skewers_stride")==0) - parms->lya_skewers_stride = atoi(value); - else if (strcmp(name, "lya_Pk_d_log_k")==0) - parms->lya_Pk_d_log_k = atof(value); + } else if (strcmp(name, "analysis_scale_outputs_file") == 0) { + strncpy(parms->analysis_scale_outputs_file, value, MAXLEN); + } else if (strcmp(name, "analysisdir") == 0) { + strncpy(parms->analysisdir, value, MAXLEN); + } else if (strcmp(name, "lya_skewers_stride") == 0) { + parms->lya_skewers_stride = atoi(value); + } else if (strcmp(name, "lya_Pk_d_log_k") == 0) { + parms->lya_Pk_d_log_k = atof(value); #ifdef OUTPUT_SKEWERS - else if (strcmp(name, "skewersdir")==0) - strncpy (parms->skewersdir, value, MAXLEN); + } else if (strcmp(name, "skewersdir") == 0) { + strncpy(parms->skewersdir, value, MAXLEN); #endif #endif - else if (!is_param_valid(name)) - chprintf ("WARNING: %s/%s: Unknown parameter/value pair!\n", - name, value); +#ifdef SCALAR + #ifdef DUST + } else if (strcmp(name, "grain_radius") == 0) { + parms->grain_radius = atoi(value); + #endif +#endif + } else if (!Is_Param_Valid(name)) { + chprintf("WARNING: %s/%s: Unknown parameter/value pair!\n", name, value); + } } diff --git a/src/global/global.h b/src/global/global.h index 4e6d8eeb9..d2734131e 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -1,169 +1,154 @@ /*! /file global.h * /brief Declarations of global variables and functions. */ - #ifndef GLOBAL_H #define GLOBAL_H -#ifdef COOLING_CPU -#include -#include -#endif +#include "../grid/grid_enum.h" // defines NSCALARS -#ifdef PARTICLES +#ifdef PARTICLES #include -#endif //PARTICLES +#endif // PARTICLES -#if PRECISION==1 -#ifndef TYPEDEF_DEFINED_REAL +#if PRECISION == 1 + #ifndef TYPEDEF_DEFINED_REAL typedef float Real; + #endif #endif -#endif -#if PRECISION==2 -#ifndef TYPEDEF_DEFINED_REAL +#if PRECISION == 2 + #ifndef TYPEDEF_DEFINED_REAL typedef double Real; -#endif + #endif #endif -#define MAXLEN 2048 +#define MAXLEN 2048 #define TINY_NUMBER 1.0e-20 -#define PI 3.141592653589793 -#define MP 1.672622e-24 // mass of proton, grams -#define KB 1.380658e-16 // boltzmann constant, cgs -//#define GN 6.67259e-8 // gravitational constant, cgs -#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 - -#define MYR 31.536e12 //Myears in secs -#define KPC 3.086e16 // kpc in km -#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 -#define MSUN_CGS 1.98847e33; //Msun in gr -#define KPC_CGS 3.086e21; //kpc in cm -#define KM_CGS 1e5; //km in cm -#define MH 1.67262171e-24 //Mass of hydrogen [g] - -#define TIME_UNIT 3.15569e10 // 1 kyr in s -#define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm -#define MASS_UNIT 1.98847e33 // 1 solar mass in grams -#define DENSITY_UNIT (MASS_UNIT/(LENGTH_UNIT*LENGTH_UNIT*LENGTH_UNIT)) -#define VELOCITY_UNIT (LENGTH_UNIT/TIME_UNIT) -#define ENERGY_UNIT (DENSITY_UNIT*VELOCITY_UNIT*VELOCITY_UNIT) -#define PRESSURE_UNIT (DENSITY_UNIT*VELOCITY_UNIT*VELOCITY_UNIT) -#define SP_ENERGY_UNIT (VELOCITY_UNIT*VELOCITY_UNIT) -#define MAGNETIC_FIELD_UNIT (sqrt(MASS_UNIT/LENGTH_UNIT) / TIME_UNIT) +#define MP 1.672622e-24 // mass of proton, grams +#define KB 1.380658e-16 // boltzmann constant, cgs +// #define GN 6.67259e-8 // gravitational constant, cgs +#define GN 4.49451e-18 // gravitational constant, kpc^3 / M_sun / kyr^2 +#define C_L 0.306594593 // speed of light in kpc/kyr + +#define MYR 31.536e12 // Myears in secs +#define KPC 3.086e16 // kpc in km +#define G_COSMO 4.300927161e-06; // gravitational constant, kpc km^2 s^-2 Msun^-1 +#define MSUN_CGS 1.98847e33; // Msun in gr +#define KPC_CGS 3.086e21; // kpc in cm +#define KM_CGS 1e5; // km in cm +#define MH 1.67262171e-24 // Mass of hydrogen [g] + +#define TIME_UNIT 3.15569e10 // 1 kyr in s +#define LENGTH_UNIT 3.08567758e21 // 1 kpc in cm +#define MASS_UNIT 1.98847e33 // 1 solar mass in grams +#define DENSITY_UNIT (MASS_UNIT / (LENGTH_UNIT * LENGTH_UNIT * LENGTH_UNIT)) +#define VELOCITY_UNIT (LENGTH_UNIT / TIME_UNIT) +#define ENERGY_UNIT (DENSITY_UNIT * VELOCITY_UNIT * VELOCITY_UNIT) +#define PRESSURE_UNIT (DENSITY_UNIT * VELOCITY_UNIT * VELOCITY_UNIT) +#define SP_ENERGY_UNIT (VELOCITY_UNIT * VELOCITY_UNIT) +#define MAGNETIC_FIELD_UNIT (sqrt(MASS_UNIT / LENGTH_UNIT) / TIME_UNIT) #define LOG_FILE_NAME "run_output.log" -//Conserved Floor Values -#define TEMP_FLOOR 1e-3 // in Kelvin -#define DENS_FLOOR 1e-5 // in code units +// Parameters for Enzo dual Energy Condition +// - Prior to GH PR #356, DE_ETA_1 nominally had a value of 0.001 in all +// simulations (in practice, the value of DE_ETA_1 had minimal significance +// in those simulations). In PR #356, we revised the internal-energy +// synchronization to account for the value of DE_ETA_1. This was necessary +// for non-cosmology simulations. +// - In Cosmological simulation, we set DE_ETA_1 to a large number (it doesn't +// really matter what, as long as its >=1) to maintain the older behavior +// - In the future, we run tests and revisit the choice of DE_ETA_1 in +// cosmological simulations +#ifdef COSMOLOGY + #define DE_ETA_1 10.0 +#else + #define DE_ETA_1 \ + 0.001 // Ratio of U to E for which Internal Energy is used to compute the + // Pressure. This also affects when the Internal Energy is used for + // the update. +#endif -//Parameter for Enzo dual Energy Condition -#define DE_ETA_1 0.001 //Ratio of U to E for which Internal Energy is used to compute the Pressure -#define DE_ETA_2 0.035 //Ratio of U to max(E_local) used to select which Internal Energy is used for the update. +#define DE_ETA_2 \ + 0.035 // Ratio of U to max(E_local) used to select which Internal Energy is + // used for the update. // Maximum time step for cosmological simulations -#define MAX_DELTA_A 0.001 +#define MAX_DELTA_A 0.001 #define MAX_EXPANSION_RATE 0.01 // Limit delta(a)/a -#ifdef COOLING_GRACKLE - #ifdef GRACKLE_METALS - #define NSCALARS 7 - #else - #define NSCALARS 6 - #endif // GRACKLE_METALS -#elif CHEMISTRY_GPU - #define NSCALARS 6 -#else -#ifdef SCALAR -// Set Number of scalar fields when not using grackle -#define NSCALARS 1 -#else -#define NSCALARS 0 -#endif//SCALAR -#endif//COOLING_GRACKLE - -#ifdef MHD +#ifdef MHD #define N_MHD_FIELDS 3 #else #define N_MHD_FIELDS 0 -#endif //MHD +#endif // MHD // Inital Chemistry fractions -#define INITIAL_FRACTION_HI 0.75984603480 -#define INITIAL_FRACTION_HII 1.53965115054e-4 -#define INITIAL_FRACTION_HEI 0.24000000008 -#define INITIAL_FRACTION_HEII 9.59999999903e-15 -#define INITIAL_FRACTION_HEIII 9.59999999903e-18 -#define INITIAL_FRACTION_ELECTRON 1.53965115054e-4 -#define INITIAL_FRACTION_METAL 1.00000000000e-10 - - -//Default Particles Compiler Flags +#define INITIAL_FRACTION_HI 0.75984603480 +#define INITIAL_FRACTION_HII 1.53965115054e-4 +#define INITIAL_FRACTION_HEI 0.24000000008 +#define INITIAL_FRACTION_HEII 9.59999999903e-15 +#define INITIAL_FRACTION_HEIII 9.59999999903e-18 +#define INITIAL_FRACTION_ELECTRON 1.53965115054e-4 +#define INITIAL_FRACTION_METAL 1.00000000000e-10 + +// Default Particles Compiler Flags #define PARTICLES_LONG_INTS #define PARTICLES_KDK - #ifdef GRAVITY -#ifdef GRAVITY_5_POINTS_GRADIENT -#ifdef PARTICLES -#define N_GHOST_POTENTIAL 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the CIC interpolation of the potential ) -#else -#define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient -#endif //PARTICLES - -#else -#ifdef PARTICLES -#define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the CIC interpolation of the potential ) -#else -#define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient -#endif //PARTICLES -#endif //GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef PARTICLES + #define N_GHOST_POTENTIAL \ + 3 // 3 ghost cells are needed for 5 point gradient, ( one is for the + // CIC interpolation of the potential ) + #else + #define N_GHOST_POTENTIAL 2 // 2 ghost cells are needed for 5 point gradient + #endif // PARTICLES + #else + #ifdef PARTICLES + #define N_GHOST_POTENTIAL \ + 2 // 2 ghost cells are needed for 3 point gradient, ( one is for the + // CIC interpolation of the potential ) + #else + #define N_GHOST_POTENTIAL 1 // 1 ghost cells are needed for 3 point gradient + #endif // PARTICLES + #endif // GRAVITY_5_POINTS_GRADIENT typedef long int grav_int_t; #endif #ifdef PARTICLES -#ifdef PARTICLES_LONG_INTS + #ifdef PARTICLES_LONG_INTS typedef long int part_int_t; -#else + #else typedef int part_int_t; -#endif//PARTICLES_LONG_INTS + #endif // PARTICLES_LONG_INTS -#include + #include typedef std::vector real_vector_t; typedef std::vector int_vector_t; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA // Constants for the inital size of the buffers for particles transfer // and the number of data transferred for each particle extern int N_PARTICLES_TRANSFER; extern int N_DATA_PER_PARTICLE_TRANSFER; -#endif//MPI_CHOLLA - -#ifdef AVERAGE_SLOW_CELLS -#define SLOW_FACTOR 10 -#endif//AVERAGE_SLOW_CELLS - -#endif//PARTICLES + #endif // MPI_CHOLLA + #ifdef AVERAGE_SLOW_CELLS + #define SLOW_FACTOR 10 + #endif // AVERAGE_SLOW_CELLS -#define SIGN(a) ( ((a) < 0.) ? -1. : 1. ) +#endif // PARTICLES +#define SIGN(a) (((a) < 0.) ? -1. : 1.) /* Global variables */ -extern Real gama; // Ratio of specific heats -extern Real C_cfl; // CFL number (0 - 0.5) +extern Real gama; // Ratio of specific heats +extern Real C_cfl; // CFL number (0 - 0.5) extern Real t_comm; extern Real t_other; -#ifdef COOLING_CPU -extern gsl_interp_accel *acc; -extern gsl_interp_accel *xacc; -extern gsl_interp_accel *yacc; -extern gsl_spline *highT_C_spline; -extern gsl_spline2d *lowT_C_spline; -extern gsl_spline2d *lowT_H_spline; -#endif #ifdef COOLING_GPU extern float *cooling_table; extern float *heating_table; @@ -173,23 +158,29 @@ extern float *heating_table; * \brief Set gamma values for Riemann solver. */ extern void Set_Gammas(Real gamma_in); -/*! \fn double get_time(void) +/*! \fn double Get_Time(void) * \brief Returns the current clock time. */ -extern double get_time(void); +extern double Get_Time(void); /*! \fn int sgn * \brief Mathematical sign function. Returns sign of x. */ -extern int sgn(Real x); - -#ifndef CUDA -/*! \fn Real calc_eta(Real cW[], Real gamma) - * \brief Calculate the eta value for the H correction. */ -extern Real calc_eta(Real cW[], Real gamma); -#endif - - -struct parameters -{ +extern int Sgn(Real x); + +/* Global variables for mpi (but they are also initialized to sensible defaults when not using mpi) + * + * It may make sense to move these back into mpi_routines (but reorganizing the ifdef statements + * would take some work). It may make sense to also put these into their own namespace. + */ +extern int procID; /*process rank*/ +extern int nproc; /*number of processes executing simulation*/ +extern int root; /*rank of root process*/ + +/* Used when MPI_CHOLLA is not defined to initialize a subset of the global mpi-related variables + * that still meaningful in non-mpi simulations. + */ +void Init_Global_Parallel_Vars_No_MPI(); + +struct Parameters { int nx; int ny; int nz; @@ -199,24 +190,29 @@ struct parameters Real gamma; char init[MAXLEN]; int nfile; - int n_hydro; - int n_particle; - int n_projection; - int n_rotated_projection; - int n_slice; - int n_out_float32=0; - int out_float32_density=0; - int out_float32_momentum_x=0; - int out_float32_momentum_y=0; - int out_float32_momentum_z=0; - int out_float32_Energy=0; + int n_hydro = 1; + int n_particle = 1; + int n_projection = 1; + int n_rotated_projection = 1; + int n_slice = 1; + int n_out_float32 = 0; + int out_float32_density = 0; + int out_float32_momentum_x = 0; + int out_float32_momentum_y = 0; + int out_float32_momentum_z = 0; + int out_float32_Energy = 0; #ifdef DE - int out_float32_GasEnergy=0; + int out_float32_GasEnergy = 0; +#endif + bool output_always = false; + bool legacy_flat_outdir = false; +#ifdef STATIC_GRAV + int custom_grav = 0; // flag to set specific static gravity field #endif #ifdef MHD - int out_float32_magnetic_x=0; - int out_float32_magnetic_y=0; - int out_float32_magnetic_z=0; + int out_float32_magnetic_x = 0; + int out_float32_magnetic_y = 0; + int out_float32_magnetic_z = 0; #endif Real xmin; Real ymin; @@ -230,7 +226,7 @@ struct parameters int yu_bcnd; int zl_bcnd; int zu_bcnd; -#ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA int xlg_bcnd; int xug_bcnd; int ylg_bcnd; @@ -240,49 +236,67 @@ struct parameters #endif /*MPI_CHOLLA*/ char custom_bcnd[MAXLEN]; char outdir[MAXLEN]; - char indir[MAXLEN]; //Folder to load Initial conditions from - Real rho; - Real vx; - Real vy; - Real vz; - Real P; - Real A; - Real Bx; - Real By; - Real Bz; - Real rho_l; - Real vx_l; - Real vy_l=0; - Real vz_l=0; - Real P_l; - Real Bx_l; - Real By_l; - Real Bz_l; - Real rho_r; - Real vx_r; - Real vy_r=0; - Real vz_r=0; - Real P_r; - Real Bx_r; - Real By_r; - Real Bz_r; - Real diaph; + char indir[MAXLEN]; // Folder to load Initial conditions from + Real rho = 0; + Real vx = 0; + Real vy = 0; + Real vz = 0; + Real P = 0; + Real A = 0; + Real Bx = 0; + Real By = 0; + Real Bz = 0; + Real rho_l = 0; + Real vx_l = 0; + Real vy_l = 0; + Real vz_l = 0; + Real P_l = 0; + Real Bx_l = 0; + Real By_l = 0; + Real Bz_l = 0; + Real rho_r = 0; + Real vx_r = 0; + Real vy_r = 0; + Real vz_r = 0; + Real P_r = 0; + Real Bx_r = 0; + Real By_r = 0; + Real Bz_r = 0; + Real diaph = 0; + Real rEigenVec_rho = 0; + Real rEigenVec_MomentumX = 0; + Real rEigenVec_MomentumY = 0; + Real rEigenVec_MomentumZ = 0; + Real rEigenVec_E = 0; + Real rEigenVec_Bx = 0; + Real rEigenVec_By = 0; + Real rEigenVec_Bz = 0; + Real pitch = 0; + Real yaw = 0; + Real polarization = 0; + Real radius = 0; + Real P_blast = 0; + Real wave_length = 1.0; #ifdef PARTICLES // The random seed for particle simulations. With the default of 0 then a // machine dependent seed will be generated. std::uint_fast64_t prng_seed = 0; -#endif // PARTICLES +#endif // PARTICLES +#ifdef SUPERNOVA + char snr_filename[MAXLEN]; +#endif #ifdef ROTATED_PROJECTION + // initialize rotation parameters to zero int nxr; int nzr; - Real delta; - Real theta; - Real phi; + Real delta = 0; + Real theta = 0; + Real phi = 0; Real Lx; Real Lz; - int n_delta; - Real ddelta_dt; - int flag_delta; + int n_delta = 0; + Real ddelta_dt = 0; + int flag_delta = 0; #endif /*ROTATED_PROJECTION*/ #ifdef COSMOLOGY Real H0; @@ -291,11 +305,12 @@ struct parameters Real Omega_b; Real Init_redshift; Real End_redshift; - char scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations -#endif //COSMOLOGY + char scale_outputs_file[MAXLEN]; // File for the scale_factor output values + // for cosmological simulations +#endif // COSMOLOGY #ifdef TILED_INITIAL_CONDITIONS Real tile_length; -#endif //TILED_INITIAL_CONDITIONS +#endif // TILED_INITIAL_CONDITIONS #ifdef SET_MPI_GRID // Set the MPI Processes grid [n_proc_x, n_proc_y, n_proc_z] @@ -304,11 +319,17 @@ struct parameters int n_proc_z; #endif int bc_potential_type; -#if defined(COOLING_GRACKLE) || defined (CHEMISTRY_GPU) - char UVB_rates_file[MAXLEN]; //File for the UVB photoheating and photoionization rates of HI, HeI and HeII -#endif +#if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) + char UVB_rates_file[MAXLEN]; // File for the UVB photoheating and + // photoionization rates of HI, HeI and HeII +#endif + Real temperature_floor = 0; + Real density_floor = 0; + Real scalar_floor = 0; #ifdef ANALYSIS - char analysis_scale_outputs_file[MAXLEN]; //File for the scale_factor output values for cosmological simulations {{}} + char analysis_scale_outputs_file[MAXLEN]; // File for the scale_factor output + // values for cosmological + // simulations {{}} char analysisdir[MAXLEN]; int lya_skewers_stride; Real lya_Pk_d_log_k; @@ -316,15 +337,20 @@ struct parameters char skewersdir[MAXLEN]; #endif #endif +#ifdef SCALAR + #ifdef DUST + Real grain_radius; + #endif +#endif }; - -/*! \fn void parse_params(char *param_file, struct parameters * parms); +/*! \fn void parse_params(char *param_file, struct Parameters * parms); * \brief Reads the parameters in the given file into a structure. */ -extern void parse_params (char *param_file, struct parameters * parms, int argc, char** argv); +extern void Parse_Params(char *param_file, struct Parameters *parms, int argc, char **argv); /*! \fn int is_param_valid(char *name); - * \brief Verifies that a param is valid (even if not needed). Avoids "warnings" in output. */ -extern int is_param_valid(const char *name); + * \brief Verifies that a param is valid (even if not needed). Avoids + * "warnings" in output. */ +extern int Is_Param_Valid(const char *name); -#endif //GLOBAL_H +#endif // GLOBAL_H diff --git a/src/global/global_cuda.cu b/src/global/global_cuda.cu index bd2e235c1..17c515416 100644 --- a/src/global/global_cuda.cu +++ b/src/global/global_cuda.cu @@ -1,24 +1,16 @@ /*! \file global_cuda.cu * \brief Declarations of the cuda global variables. */ -#ifdef CUDA - #include "../global/global.h" // Declare global variables bool memory_allocated; Real *dev_conserved, *dev_conserved_half; Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; +Real *ctElectricFields; Real *eta_x, *eta_y, *eta_z, *etah_x, *etah_y, *etah_z; -Real *dev_dti; -//Arrays for potential in GPU: Will be set to NULL if not using GRAVITY +// Arrays for potential in GPU: Will be set to NULL if not using GRAVITY Real *dev_grav_potential; Real *temp_potential; Real *buffer_potential; - -// Arrays for calc_dt -Real *host_dti_array; -Real *dev_dti_array; - -#endif //CUDA diff --git a/src/global/global_cuda.h b/src/global/global_cuda.h index 35c0c355f..3f4d3148e 100644 --- a/src/global/global_cuda.h +++ b/src/global/global_cuda.h @@ -1,23 +1,22 @@ /*! /file global_cuda.h - * /brief Declarations of global variables and functions for the cuda kernels. */ - -#ifdef CUDA - -#include -#include -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" - + * /brief Declarations of global variables and functions for the cuda kernels. + */ #ifndef GLOBAL_CUDA_H #define GLOBAL_CUDA_H -#define TPB 256 // threads per block -//#define TPB 64 +#include +#include +#include + +#include "../global/global.h" +#include "../utils/gpu.hpp" +#define TPB 256 // threads per block +// #define TPB 64 -extern bool memory_allocated; // Flag becomes true after allocating the memory on the first timestep +extern bool memory_allocated; // Flag becomes true after allocating the memory + // on the first timestep // Arrays are global so that they can be allocated only once. // Not all arrays will be allocated for every integrator @@ -25,81 +24,46 @@ extern bool memory_allocated; // Flag becomes true after allocating the memory o // conserved variables extern Real *dev_conserved, *dev_conserved_half; // input states and associated interface fluxes (Q* and F* from Stone, 2008) +// Note that for hydro the size of these arrays is n_fields*n_cells*sizeof(Real) +// while for MHD it is (n_fields-1)*n_cells*sizeof(Real), i.e. they has one +// fewer field than you would expect extern Real *Q_Lx, *Q_Rx, *Q_Ly, *Q_Ry, *Q_Lz, *Q_Rz, *F_x, *F_y, *F_z; +// Constrained transport electric fields +extern Real *ctElectricFields; -// Scalar for storing device side hydro/MHD time steps -extern Real *dev_dti; - -// array of inverse timesteps for dt calculation (brought back by Alwin May 24 2022) -extern Real *host_dti_array; -extern Real *dev_dti_array; - -//Arrays for potential in GPU: Will be set to NULL if not using GRAVITY +// Arrays for potential in GPU: Will be set to NULL if not using GRAVITY extern Real *dev_grav_potential; extern Real *temp_potential; extern Real *buffer_potential; -#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ ) -#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ ) - -inline void __cudaSafeCall( cudaError err, const char *file, const int line ) -{ -#ifdef CUDA_ERROR_CHECK - if ( cudaSuccess != err ) - { - fprintf( stderr, "cudaSafeCall() failed at %s:%i : %s\n", - file, line, cudaGetErrorString( err ) ); - exit( -1 ); - } -#endif - - return; -} - -inline void __cudaCheckError( const char *file, const int line ) -{ -#ifdef CUDA_ERROR_CHECK - cudaError err = cudaGetLastError(); - if ( cudaSuccess != err ) - { - fprintf( stderr, "cudaCheckError() failed at %s:%i : %s\n", - file, line, cudaGetErrorString( err ) ); - exit( -1 ); - } - - // More careful checking. However, this will affect performance. - // Comment away if needed. - err = cudaDeviceSynchronize(); - if( cudaSuccess != err ) - { - fprintf( stderr, "cudaCheckError() with sync failed at %s:%i : %s\n", - file, line, cudaGetErrorString( err ) ); - exit( -1 ); - } -#endif - - return; -} - - -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) -{ - if (code != cudaSuccess) - { - fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } -} - /*! \fn int sgn_CUDA * \brief Mathematical sign function. Returns sign of x. */ __device__ inline int sgn_CUDA(Real x) { - if (x < 0) return -1; - else return 1; + if (x < 0) { + return -1; + } else { + return 1; + } } -#endif //GLOBAL_CUDA_H +// Define atomic_add if it's not supported +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double *address, double val) +{ + unsigned long long int *address_as_ull = (unsigned long long int *)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + +// This helper function exists to make it easier to find printfs inside +// kernels +#define kernel_printf printf -#endif //CUDA +#endif // GLOBAL_CUDA_H diff --git a/src/gravity/grav3D.cpp b/src/gravity/grav3D.cpp index f07ebade0..866663589 100644 --- a/src/gravity/grav3D.cpp +++ b/src/gravity/grav3D.cpp @@ -1,85 +1,84 @@ #ifdef GRAVITY -#include -#include -#include -#include -#include "../global/global.h" -#include "../io/io.h" + #include "../gravity/grav3D.h" -#include "../gravity/grav3D.h" - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include + #include + #include + #include + #include "../global/global.h" + #include "../io/io.h" + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -Grav3D::Grav3D( void ){} +Grav3D::Grav3D(void) {} -void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P ) +void Grav3D::Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, + Real Lz, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, + Real dy_real, Real dz_real, int n_ghost_pot_offset, struct Parameters *P) { - - //Set Box Size + // Set Box Size Lbox_x = Lx; Lbox_y = Ly; Lbox_z = Lz; - //Set Box Left boundary positions + // Set Box Left boundary positions xMin = x_min; yMin = y_min; zMin = z_min; - //Set Box Right boundary positions + // Set Box Right boundary positions xMax = x_max; yMax = y_max; zMax = z_max; - - - //Set uniform ( dx, dy, dz ) + // Set uniform ( dx, dy, dz ) dx = dx_real; dy = dy_real; dz = dz_real; - //Set Box Total number of cells + // Set Box Total number of cells nx_total = nx; ny_total = ny; nz_total = nz; - //Set Box local domain number of cells + // Set Box local domain number of cells nx_local = nx_real; ny_local = ny_real; nz_local = nz_real; - //Local n_cells without ghost cells - n_cells = nx_local*ny_local*nz_local; - //Local n_cells including ghost cells for the potential array - n_cells_potential = ( nx_local + 2*N_GHOST_POTENTIAL ) * ( ny_local + 2*N_GHOST_POTENTIAL ) * ( nz_local + 2*N_GHOST_POTENTIAL ); + // Local n_cells without ghost cells + n_cells = nx_local * ny_local * nz_local; + // Local n_cells including ghost cells for the potential array + n_cells_potential = + (nx_local + 2 * N_GHOST_POTENTIAL) * (ny_local + 2 * N_GHOST_POTENTIAL) * (nz_local + 2 * N_GHOST_POTENTIAL); - //Set Initial and dt used for the extrapolation of the potential; - //The first timestep the potential in not extrapolated ( INITIAL = TRUE ) + // Set Initial and dt used for the extrapolation of the potential; + // The first timestep the potential in not extrapolated ( INITIAL = TRUE ) INITIAL = true; dt_prev = 0; - dt_now = 0; + dt_now = 0; #ifdef COSMOLOGY - //Set the scale factor for cosmological simulations to 1, - //This will be changed to the proper value when cosmology is initialized + // Set the scale factor for cosmological simulations to 1, + // This will be changed to the proper value when cosmology is initialized current_a = 1; #endif - //Set the average density=0 ( Not Used ) + // Set the average density=0 ( Not Used ) dens_avrg = 0; - //Set the Gravitational Constant ( units must be consistent ) + // Set the Gravitational Constant ( units must be consistent ) Gconst = GN; - if (strcmp(P->init, "Spherical_Overdensity_3D")==0){ + if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) { Gconst = 1; chprintf(" WARNING: Using Gravitational Constant G=1.\n"); } - //Flag to transfer the Potential boundaries + // Flag to transfer the Potential boundaries TRANSFER_POTENTIAL_BOUNDARIES = false; // Flag to set the gravity boundary flags @@ -93,12 +92,14 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_ Initialize_values_CPU(); - chprintf( "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n Global: %d %d %d \n", - Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, nz_total ); + chprintf( + "Gravity Initialized: \n Lbox: %0.2f %0.2f %0.2f \n Local: %d %d %d \n " + "Global: %d %d %d \n", + Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, nx_total, ny_total, nz_total); - chprintf( " dx:%f dy:%f dz:%f\n", dx, dy, dz ); - chprintf( " N ghost potential: %d\n", N_GHOST_POTENTIAL); - chprintf( " N ghost offset: %d\n", n_ghost_pot_offset); + chprintf(" dx:%f dy:%f dz:%f\n", dx, dy, dz); + chprintf(" N ghost potential: %d\n", N_GHOST_POTENTIAL); + chprintf(" N ghost offset: %d\n", n_ghost_pot_offset); #ifdef PARALLEL_OMP chprintf(" Using OMP for gravity calculations\n"); @@ -107,47 +108,67 @@ void Grav3D::Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_ chprintf(" N OMP Threads per MPI process: %d\n", N_OMP_THREADS); #endif - Poisson_solver.Initialize( Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz ); + Poisson_solver.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, + nz_local, dx, dy, dz); #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) - Poisson_solver_test.Initialize( Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, ny_local, nz_local, dx, dy, dz ); + Poisson_solver_test.Initialize(Lbox_x, Lbox_y, Lbox_z, xMin, yMin, zMin, nx_total, ny_total, nz_total, nx_local, + ny_local, nz_local, dx, dy, dz); #endif + + // At the end of initializing, set restart state if needed + + if ((strcmp(P->init, "Read_Grid") == 0) && (P->nfile > 0)) { + Read_Restart_HDF5(P, P->nfile); + } } void Grav3D::AllocateMemory_CPU(void) { // allocate memory for the density and potential arrays - F.density_h = (Real *) malloc(n_cells*sizeof(Real)); //array for the density - F.potential_h = (Real *) malloc(n_cells_potential*sizeof(Real)); //array for the potential at the n-th timestep - F.potential_1_h = (Real *) malloc(n_cells_potential*sizeof(Real)); //array for the potential at the (n-1)-th timestep - boundary_flags = (int *) malloc(6*sizeof(int)); // array for the gravity boundary flags + F.density_h = (Real *)malloc(n_cells * sizeof(Real)); // array for the + // density + F.potential_h = (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the n-th timestep + F.potential_1_h = + (Real *)malloc(n_cells_potential * sizeof(Real)); // array for the potential at the (n-1)-th timestep + boundary_flags = (int *)malloc(6 * sizeof(int)); // array for the gravity boundary flags #ifdef GRAV_ISOLATED_BOUNDARY_X - F.pot_boundary_x0 = (Real *) malloc(N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)); //array for the potential isolated boundary - F.pot_boundary_x1 = (Real *) malloc(N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)); + F.pot_boundary_x0 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_x1 = (Real *)malloc(N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real)); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - F.pot_boundary_y0 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)); //array for the potential isolated boundary - F.pot_boundary_y1 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)); + F.pot_boundary_y0 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_y1 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real)); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - F.pot_boundary_z0 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); //array for the potential isolated boundary - F.pot_boundary_z1 = (Real *) malloc(N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)); + F.pot_boundary_z0 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * + sizeof(Real)); // array for the potential isolated boundary + F.pot_boundary_z1 = (Real *)malloc(N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real)); #endif -} -void Grav3D::Set_Boundary_Flags( int *flags ){ - for (int i=0; i<6; i++) boundary_flags[i] = flags[i]; + #ifdef GRAVITY_ANALYTIC_COMP + F.analytic_potential_h = (Real *)malloc(n_cells_potential * sizeof(Real)); + #endif } -void Grav3D::Initialize_values_CPU(void){ +void Grav3D::Set_Boundary_Flags(int *flags) +{ + for (int i = 0; i < 6; i++) { + boundary_flags[i] = flags[i]; + } +} - //Set initial values to 0. - for (int id=0; id + #include "../global/global.h" #ifdef SOR -#include "../gravity/potential_SOR_3D.h" + #include "../gravity/potential_SOR_3D.h" #endif #ifdef PARIS -#include "../gravity/potential_paris_3D.h" + #include "../gravity/potential_paris_3D.h" #endif #ifdef PARIS_GALACTIC -#include "../gravity/potential_paris_galactic.h" + #include "../gravity/potential_paris_galactic.h" #endif #ifdef HDF5 -#include + #include #endif #define GRAV_ISOLATED_BOUNDARY_X #define GRAV_ISOLATED_BOUNDARY_Y #define GRAV_ISOLATED_BOUNDARY_Z -#define TPB_GRAV 1024 +#define TPB_GRAV 1024 #define TPBX_GRAV 16 #define TPBY_GRAV 8 #define TPBZ_GRAV 8 @@ -33,8 +34,7 @@ * \brief Class to create a the gravity object. */ class Grav3D { - public: - + public: Real Lbox_x; Real Lbox_y; Real Lbox_z; @@ -46,46 +46,44 @@ class Grav3D Real yMax; Real zMax; /*! \var nx - * \brief Total number of cells in the x-dimension */ + * \brief Total number of cells in the x-dimension */ int nx_total; /*! \var ny - * \brief Total number of cells in the y-dimension */ + * \brief Total number of cells in the y-dimension */ int ny_total; /*! \var nz - * \brief Total number of cells in the z-dimension */ + * \brief Total number of cells in the z-dimension */ int nz_total; /*! \var nx_local - * \brief Local number of cells in the x-dimension */ + * \brief Local number of cells in the x-dimension */ int nx_local; /*! \var ny_local - * \brief Local number of cells in the y-dimension */ + * \brief Local number of cells in the y-dimension */ int ny_local; /*! \var nz_local - * \brief Local number of cells in the z-dimension */ + * \brief Local number of cells in the z-dimension */ int nz_local; /*! \var dx - * \brief x-width of cells */ + * \brief x-width of cells */ Real dx; /*! \var dy - * \brief y-width of cells */ + * \brief y-width of cells */ Real dy; /*! \var dz - * \brief z-width of cells */ + * \brief z-width of cells */ Real dz; - #ifdef COSMOLOGY +#ifdef COSMOLOGY Real current_a; - #endif - - Real dens_avrg ; +#endif + Real dens_avrg; int n_cells; int n_cells_potential; - bool INITIAL; Real dt_prev; @@ -95,117 +93,129 @@ class Grav3D bool TRANSFER_POTENTIAL_BOUNDARIES; - bool BC_FLAGS_SET; int *boundary_flags; - - #ifdef SOR +#ifdef SOR Potential_SOR_3D Poisson_solver; - #endif +#endif - #ifdef PARIS - Potential_Paris_3D Poisson_solver; - #endif +#ifdef PARIS + PotentialParis3D Poisson_solver; +#endif - #ifdef PARIS_GALACTIC +#ifdef PARIS_GALACTIC #ifdef SOR - #define PARIS_GALACTIC_TEST - Potential_Paris_Galactic Poisson_solver_test; + #define PARIS_GALACTIC_TEST + PotentialParisGalactic Poisson_solver_test; #else - Potential_Paris_Galactic Poisson_solver; - #endif + PotentialParisGalactic Poisson_solver; #endif +#endif - struct Fields - { + struct Fields { /*! \var density_h * \brief Array containing the density of each cell in the grid */ Real *density_h; /*! \var potential_h - * \brief Array containing the gravitational potential of each cell in the grid */ + * \brief Array containing the gravitational potential of each cell in the + * grid */ Real *potential_h; /*! \var potential_h - * \brief Array containing the gravitational potential of each cell in the grid at the previous time step */ + * \brief Array containing the gravitational potential of each cell in the + * grid at the previous time step */ Real *potential_1_h; - #ifdef GRAVITY_GPU +#ifdef GRAVITY_ANALYTIC_COMP + Real *analytic_potential_h; +#endif + +#ifdef GRAVITY_GPU /*! \var density_d * \brief Device Array containing the density of each cell in the grid */ Real *density_d; /*! \var potential_d - * \brief Device Array containing the gravitational potential of each cell in the grid */ + * \brief Device Array containing the gravitational potential of each cell + * in the grid */ Real *potential_d; /*! \var potential_d - * \brief Device Array containing the gravitational potential of each cell in the grid at the previous time step */ + * \brief Device Array containing the gravitational potential of each cell + * in the grid at the previous time step */ Real *potential_1_d; - #endif //GRAVITY_GPU + #ifdef GRAVITY_ANALYTIC_COMP + Real *analytic_potential_d; + #endif + +#endif // GRAVITY_GPU - // Arrays for computing the potential values in isolated boundaries - #ifdef GRAV_ISOLATED_BOUNDARY_X +// Arrays for computing the potential values in isolated boundaries +#ifdef GRAV_ISOLATED_BOUNDARY_X Real *pot_boundary_x0; Real *pot_boundary_x1; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y +#endif +#ifdef GRAV_ISOLATED_BOUNDARY_Y Real *pot_boundary_y0; Real *pot_boundary_y1; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z +#endif +#ifdef GRAV_ISOLATED_BOUNDARY_Z Real *pot_boundary_z0; Real *pot_boundary_z1; - #endif +#endif - #ifdef GRAVITY_GPU - #ifdef GRAV_ISOLATED_BOUNDARY_X +#ifdef GRAVITY_GPU + #ifdef GRAV_ISOLATED_BOUNDARY_X Real *pot_boundary_x0_d; Real *pot_boundary_x1_d; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y Real *pot_boundary_y0_d; Real *pot_boundary_y1_d; - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z Real *pot_boundary_z0_d; Real *pot_boundary_z1_d; - #endif - #endif//GRAVITY_GPU + #endif +#endif // GRAVITY_GPU } F; /*! \fn Grav3D(void) - * \brief Constructor for the gravity class */ + * \brief Constructor for the gravity class */ Grav3D(void); /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) - * \brief Initialize the grid. */ - void Initialize( Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real, int n_ghost_pot_offset, struct parameters *P); + * \brief Initialize the grid. */ + void Initialize(Real x_min, Real y_min, Real z_min, Real x_max, Real y_max, Real z_max, Real Lx, Real Ly, Real Lz, + int nx_total, int ny_total, int nz_total, int nx_real, int ny_real, int nz_real, Real dx_real, + Real dy_real, Real dz_real, int n_ghost_pot_offset, struct Parameters *P); void AllocateMemory_CPU(void); void Initialize_values_CPU(); void FreeMemory_CPU(void); - Real Get_Average_Density( ); - Real Get_Average_Density_function( int g_start, int g_end ); + void Read_Restart_HDF5(struct Parameters *P, int nfile); + void Write_Restart_HDF5(struct Parameters *P, int nfile); - void Set_Boundary_Flags( int *flags ); + Real Get_Average_Density(); + Real Get_Average_Density_function(int g_start, int g_end); - #ifdef SOR - void Copy_Isolated_Boundary_To_GPU_buffer( Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size ); - void Copy_Isolated_Boundaries_To_GPU( struct parameters *P ); - #endif + void Set_Boundary_Flags(int *flags); + +#ifdef SOR + void Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size); + void Copy_Isolated_Boundaries_To_GPU(struct Parameters *P); +#endif - #ifdef GRAVITY_GPU +#ifdef GRAVITY_GPU void AllocateMemory_GPU(void); void FreeMemory_GPU(void); - #endif - +#endif }; - -#endif //GRAV3D_H +#endif // GRAV3D_H diff --git a/src/gravity/gravity_boundaries.cpp b/src/gravity/gravity_boundaries.cpp index 8c813af81..5e4b101eb 100644 --- a/src/gravity/gravity_boundaries.cpp +++ b/src/gravity/gravity_boundaries.cpp @@ -1,98 +1,130 @@ #ifdef GRAVITY + #include -#include -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../gravity/grav3D.h" -#include "../model/disk_galaxy.h" + #include "../gravity/grav3D.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../model/disk_galaxy.h" -#if defined (GRAV_ISOLATED_BOUNDARY_X) || defined (GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) - -void Grid3D::Compute_Potential_Boundaries_Isolated( int dir, struct parameters *P ){ + #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) +void Grid3D::Compute_Potential_Boundaries_Isolated(int dir, struct Parameters *P) +{ // Set Isolated Boundaries for the ghost cells. int bc_potential_type = P->bc_potential_type; - //bc_potential_type = 0 -> Point mass potential GM/r - if ( dir == 0 ) Compute_Potential_Isolated_Boundary( 0, 0, bc_potential_type ); - if ( dir == 1 ) Compute_Potential_Isolated_Boundary( 0, 1, bc_potential_type ); - if ( dir == 2 ) Compute_Potential_Isolated_Boundary( 1, 0, bc_potential_type ); - if ( dir == 3 ) Compute_Potential_Isolated_Boundary( 1, 1, bc_potential_type ); - if ( dir == 4 ) Compute_Potential_Isolated_Boundary( 2, 0, bc_potential_type ); - if ( dir == 5 ) Compute_Potential_Isolated_Boundary( 2, 1, bc_potential_type ); - + // bc_potential_type = 0 -> Point mass potential GM/r + if (dir == 0) { + Compute_Potential_Isolated_Boundary(0, 0, bc_potential_type); + } + if (dir == 1) { + Compute_Potential_Isolated_Boundary(0, 1, bc_potential_type); + } + if (dir == 2) { + Compute_Potential_Isolated_Boundary(1, 0, bc_potential_type); + } + if (dir == 3) { + Compute_Potential_Isolated_Boundary(1, 1, bc_potential_type); + } + if (dir == 4) { + Compute_Potential_Isolated_Boundary(2, 0, bc_potential_type); + } + if (dir == 5) { + Compute_Potential_Isolated_Boundary(2, 1, bc_potential_type); + } } -void Grid3D::Set_Potential_Boundaries_Isolated( int direction, int side, int *flags ){ - +void Grid3D::Set_Potential_Boundaries_Isolated(int direction, int side, int *flags) +{ Real *pot_boundary; int n_i, n_j, nGHST; int nx_g, ny_g, nz_g; int nx_local, ny_local, nz_local; - nGHST = N_GHOST_POTENTIAL; - nx_g = Grav.nx_local + 2*nGHST; - ny_g = Grav.ny_local + 2*nGHST; - nz_g = Grav.nz_local + 2*nGHST; + nGHST = N_GHOST_POTENTIAL; + nx_g = Grav.nx_local + 2 * nGHST; + ny_g = Grav.ny_local + 2 * nGHST; + nz_g = Grav.nz_local + 2 * nGHST; nx_local = Grav.nx_local; ny_local = Grav.ny_local; nz_local = Grav.nz_local; - #ifdef GRAV_ISOLATED_BOUNDARY_X - if ( direction == 0 ){ + #ifdef GRAV_ISOLATED_BOUNDARY_X + if (direction == 0) { n_i = Grav.ny_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_x0; - if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_x1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_x0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_x1; + } } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - if ( direction == 1 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + if (direction == 1) { n_i = Grav.nx_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_y0; - if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_y1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_y0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_y1; + } } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - if ( direction == 2 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + if (direction == 2) { n_i = Grav.nx_local; n_j = Grav.ny_local; - if ( side == 0 ) pot_boundary = Grav.F.pot_boundary_z0; - if ( side == 1 ) pot_boundary = Grav.F.pot_boundary_z1; + if (side == 0) { + pot_boundary = Grav.F.pot_boundary_z0; + } + if (side == 1) { + pot_boundary = Grav.F.pot_boundary_z1; + } } - #endif + #endif int i, j, k, id_buffer, id_grid; - for ( k=0; k -#include -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../gravity/grav3D.h" + #include "../gravity/grav3D.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #if defined(GRAV_ISOLATED_BOUNDARY_X) || defined(GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) -#if defined (GRAV_ISOLATED_BOUNDARY_X) || defined (GRAV_ISOLATED_BOUNDARY_Y) || defined(GRAV_ISOLATED_BOUNDARY_Z) - -void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *potential_d, Real *pot_boundary_d ){ - +void __global__ Set_Potential_Boundaries_Isolated_kernel(int direction, int side, int size_buffer, int n_i, int n_j, + int nx, int ny, int nz, int n_ghost, Real *potential_d, + Real *pot_boundary_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( tid_k ) + (tid_i+n_ghost)*nx + (tid_j+n_ghost)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost + tid_k ) + (tid_i+n_ghost)*nx + (tid_j+n_ghost)*nx*ny; + if (direction == 0) { + if (side == 0) { + tid_pot = (tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + } + if (side == 1) { + tid_pot = (nx - n_ghost + tid_k) + (tid_i + n_ghost) * nx + (tid_j + n_ghost) * nx * ny; + } } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + ( tid_k )*nx + (tid_j+n_ghost)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + ( ny - n_ghost + tid_k )*nx + (tid_j+n_ghost)*nx*ny; + if (direction == 1) { + if (side == 0) { + tid_pot = (tid_i + n_ghost) + (tid_k)*nx + (tid_j + n_ghost) * nx * ny; + } + if (side == 1) { + tid_pot = (tid_i + n_ghost) + (ny - n_ghost + tid_k) * nx + (tid_j + n_ghost) * nx * ny; + } } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx + ( nz - n_ghost + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) { + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx + (nz - n_ghost + tid_k) * nx * ny; + } } potential_d[tid_pot] = pot_boundary_d[tid_buffer]; } -void Grid3D::Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int *flags ){ - +void Grid3D::Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int *flags) +{ int n_i, n_j, n_ghost, size_buffer; int nx_g, ny_g, nz_g; n_ghost = N_GHOST_POTENTIAL; - nx_g = Grav.nx_local + 2*n_ghost; - ny_g = Grav.ny_local + 2*n_ghost; - nz_g = Grav.nz_local + 2*n_ghost; - + nx_g = Grav.nx_local + 2 * n_ghost; + ny_g = Grav.ny_local + 2 * n_ghost; + nz_g = Grav.nz_local + 2 * n_ghost; Real *pot_boundary_h, *pot_boundary_d; - #ifdef GRAV_ISOLATED_BOUNDARY_X - if ( direction == 0 ){ + #ifdef GRAV_ISOLATED_BOUNDARY_X + if (direction == 0) { n_i = Grav.ny_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_x0; - if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_x1; - if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_x0_d; - if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_x1_d; + if (side == 0) { + pot_boundary_h = Grav.F.pot_boundary_x0; + } + if (side == 1) { + pot_boundary_h = Grav.F.pot_boundary_x1; + } + if (side == 0) { + pot_boundary_d = Grav.F.pot_boundary_x0_d; + } + if (side == 1) { + pot_boundary_d = Grav.F.pot_boundary_x1_d; + } } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - if ( direction == 1 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + if (direction == 1) { n_i = Grav.nx_local; n_j = Grav.nz_local; - if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_y0; - if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_y1; - if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_y0_d; - if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_y1_d; + if (side == 0) { + pot_boundary_h = Grav.F.pot_boundary_y0; + } + if (side == 1) { + pot_boundary_h = Grav.F.pot_boundary_y1; + } + if (side == 0) { + pot_boundary_d = Grav.F.pot_boundary_y0_d; + } + if (side == 1) { + pot_boundary_d = Grav.F.pot_boundary_y1_d; + } } - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - if ( direction == 2 ){ + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + if (direction == 2) { n_i = Grav.nx_local; n_j = Grav.ny_local; - if ( side == 0 ) pot_boundary_h = Grav.F.pot_boundary_z0; - if ( side == 1 ) pot_boundary_h = Grav.F.pot_boundary_z1; - if ( side == 0 ) pot_boundary_d = Grav.F.pot_boundary_z0_d; - if ( side == 1 ) pot_boundary_d = Grav.F.pot_boundary_z1_d; + if (side == 0) { + pot_boundary_h = Grav.F.pot_boundary_z0; + } + if (side == 1) { + pot_boundary_h = Grav.F.pot_boundary_z1; + } + if (side == 0) { + pot_boundary_d = Grav.F.pot_boundary_z0_d; + } + if (side == 1) { + pot_boundary_d = Grav.F.pot_boundary_z1_d; + } } - #endif + #endif size_buffer = N_GHOST_POTENTIAL * n_i * n_j; // set values for GPU kernels - int ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1; + int ngrid = (size_buffer - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_GRAV, 1, 1); - //Copy the boundary array from host to device - cudaMemcpy( pot_boundary_d, pot_boundary_h, size_buffer*sizeof(Real), cudaMemcpyHostToDevice ); + // Copy the boundary array from host to device + cudaMemcpy(pot_boundary_d, pot_boundary_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); // Copy the potential boundary from buffer to potential array - hipLaunchKernelGGL( Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d ); - + hipLaunchKernelGGL(Set_Potential_Boundaries_Isolated_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + size_buffer, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d, pot_boundary_d); } + #endif // GRAV_ISOLATED_BOUNDARY - - -#endif //GRAV_ISOLATED_BOUNDARY - - -void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *potential_d ){ - +void __global__ Set_Potential_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, int ny, + int nz, int n_ghost, Real *potential_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_src, tid_dst; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - if ( direction == 0 ){ - if ( side == 0 ) tid_src = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = ( tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = ( n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = ( nx - n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; } - if ( direction == 1 ){ - if ( side == 0 ) tid_src = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + ( tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + ( n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + ( ny - n_ghost + tid_k )*nx + (tid_j)*nx*ny; + + if (direction == 0) { + if (side == 0) { + tid_src = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } - if ( direction == 2 ){ - if ( side == 0 ) tid_src = (tid_i) + (tid_j)*nx + ( nz - 2*n_ghost + tid_k )*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k )*nx*ny; + if (direction == 1) { + if (side == 0) { + tid_src = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } - + if (direction == 2) { + if (side == 0) { + tid_src = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + } + } + potential_d[tid_dst] = potential_d[tid_src]; - } - -void Grid3D::Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int *flags ){ - +void Grid3D::Set_Potential_Boundaries_Periodic_GPU(int direction, int side, int *flags) +{ int n_i, n_j, n_ghost, size; int nx_g, ny_g, nz_g; n_ghost = N_GHOST_POTENTIAL; - nx_g = Grav.nx_local + 2*n_ghost; - ny_g = Grav.ny_local + 2*n_ghost; - nz_g = Grav.nz_local + 2*n_ghost; + nx_g = Grav.nx_local + 2 * n_ghost; + ny_g = Grav.ny_local + 2 * n_ghost; + nz_g = Grav.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -165,66 +223,81 @@ void Grid3D::Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int size = N_GHOST_POTENTIAL * n_i * n_j; // set values for GPU kernels - int ngrid = ( size - 1 ) / TPB_GRAV + 1; + int ngrid = (size - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_GRAV, 1, 1); // Copy the potential boundary from buffer to potential array - hipLaunchKernelGGL( Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d ); - - + hipLaunchKernelGGL(Set_Potential_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, + nx_g, ny_g, nz_g, n_ghost, Grav.F.potential_d); } -__global__ void Load_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Load_Transfer_Buffer_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) { + return; + } - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) { + tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) { + tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + } } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) { + tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + } } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; - } -int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ){ - +int Grid3D::Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start) +{ // printf( "Loading Gravity Buffer: Dir %d side: %d \n", direction, side ); - int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;; + int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid; + ; n_ghost_potential = N_GHOST_POTENTIAL; n_ghost_transfer = N_GHOST_POTENTIAL; - nx_pot = Grav.nx_local + 2*n_ghost_potential; - ny_pot = Grav.ny_local + 2*n_ghost_potential; - nz_pot = Grav.nz_local + 2*n_ghost_potential; + nx_pot = Grav.nx_local + 2 * n_ghost_potential; + ny_pot = Grav.ny_local + 2 * n_ghost_potential; + nz_pot = Grav.nz_local + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -232,7 +305,7 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1; + ngrid = (size_buffer - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -244,61 +317,77 @@ int Grid3D::Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real Real *send_buffer_d; send_buffer_d = buffer; - hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d ); - CHECK(cudaDeviceSynchronize()); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, + n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, send_buffer_d); + GPU_Error_Check(cudaDeviceSynchronize()); return size_buffer; } -__global__ void Unload_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Unload_Transfer_Buffer_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) { + return; + } - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) { + tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) { + tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + } } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) { + tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + } + if (side == 1) { + tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; + } } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; - } - -void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ){ - +void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start) +{ // printf( "Loading Gravity Buffer: Dir %d side: %d \n", direction, side ); - int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid;; + int nx_pot, ny_pot, nz_pot, size_buffer, n_ghost_potential, n_ghost_transfer, n_i, n_j, ngrid; + ; n_ghost_potential = N_GHOST_POTENTIAL; n_ghost_transfer = N_GHOST_POTENTIAL; - nx_pot = Grav.nx_local + 2*n_ghost_potential; - ny_pot = Grav.ny_local + 2*n_ghost_potential; - nz_pot = Grav.nz_local + 2*n_ghost_potential; + nx_pot = Grav.nx_local + 2 * n_ghost_potential; + ny_pot = Grav.ny_local + 2 * n_ghost_potential; + nz_pot = Grav.nz_local + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -306,7 +395,7 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_GRAV + 1; + ngrid = (size_buffer - 1) / TPB_GRAV + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -318,9 +407,8 @@ void Grid3D::Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *recv_buffer_d; recv_buffer_d = buffer; - hipLaunchKernelGGL( Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, recv_buffer_d ); - + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, + n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, recv_buffer_d); } - -#endif //GRAVITY +#endif // GRAVITY diff --git a/src/gravity/gravity_functions.cpp b/src/gravity/gravity_functions.cpp index ed5b0ba87..744f55825 100644 --- a/src/gravity/gravity_functions.cpp +++ b/src/gravity/gravity_functions.cpp @@ -1,31 +1,29 @@ #ifdef GRAVITY -#include "../grid/grid3D.h" -#include "../global/global.h" -#include "../io/io.h" -#include "../utils/error_handling.h" -#include + #include -#ifdef CUDA -#include "../mpi/cuda_mpi_routines.h" -#endif + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../mpi/cuda_mpi_routines.h" + #include "../utils/error_handling.h" -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif - -#if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) -#include -#endif + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -#ifdef PARTICLES -#include "../model/disk_galaxy.h" -#endif + #if defined(PARIS_TEST) || defined(PARIS_GALACTIC_TEST) + #include + #endif -//Set delta_t when using gravity -void Grid3D::set_dt_Gravity(){ + // #ifdef PARTICLES + #include "../model/disk_galaxy.h" +// #endif - //Delta_t for the hydro +// Set delta_t when usi#ng gravity +void Grid3D::set_dt_Gravity() +{ + // Delta_t for the hydro Real dt_hydro = H.dt; #ifdef AVERAGE_SLOW_CELLS @@ -33,178 +31,182 @@ void Grid3D::set_dt_Gravity(){ #endif #ifdef PARTICLES - //Compute delta_t for particles and choose min(dt_particles, dt_hydro) + // Compute delta_t for particles and choose min(dt_particles, dt_hydro) Real dt_particles, dt_min; - #ifdef COSMOLOGY - chprintf( "Current_z: %f \n", Cosmo.current_z ); + #ifdef COSMOLOGY + chprintf("Current_z: %f \n", Cosmo.current_z); Real da_particles, da_min, dt_physical; - //Compute the particles delta_t + // Compute the particles delta_t Particles.dt = Calc_Particles_dt_Cosmo(); dt_particles = Particles.dt; - //Convert delta_t to delta_a ( a = scale factor ) - da_particles = Cosmo.Get_da_from_dt( dt_particles ); - da_particles = fmin( da_particles, 1.0 ); //Limit delta_a + // Convert delta_t to delta_a ( a = scale factor ) + da_particles = Cosmo.Get_da_from_dt(dt_particles); + da_particles = fmin(da_particles, 1.0); // Limit delta_a - #ifdef ONLY_PARTICLES - //If only particles da_min is only da_particles + #ifdef ONLY_PARTICLES + // If only particles da_min is only da_particles da_min = da_particles; - chprintf( " Delta_a_particles: %f \n", da_particles ); + chprintf(" Delta_a_particles: %f \n", da_particles); - #else //NOT ONLY_PARTICLES - //Here da_min is the minumum between da_particles and da_hydro + #else // NOT ONLY_PARTICLES + // Here da_min is the minumum between da_particles and da_hydro Real da_hydro; - da_hydro = Cosmo.Get_da_from_dt( dt_hydro ) * Cosmo.current_a * Cosmo.current_a / Cosmo.H0; //Convet delta_t to delta_a - da_min = fmin( da_hydro, da_particles ); //Find the minumum delta_a - chprintf( " Delta_a_particles: %f Delta_a_gas: %f \n", da_particles, da_hydro ); + da_hydro = + Cosmo.Get_da_from_dt(dt_hydro) * Cosmo.current_a * Cosmo.current_a / Cosmo.H0; // Convet delta_t to delta_a + da_min = fmin(da_hydro, da_particles); // Find the minumum delta_a + chprintf(" Delta_a_particles: %f Delta_a_gas: %f \n", da_particles, da_hydro); - #endif//ONLY_PARTICLES + #endif // ONLY_PARTICLES - //Limit delta_a by the expansion rate - Cosmo.max_delta_a = fmin( MAX_EXPANSION_RATE * Cosmo.current_a, MAX_DELTA_A ); - if( da_min > Cosmo.max_delta_a){ + // Limit delta_a by the expansion rate + Cosmo.max_delta_a = fmin(MAX_EXPANSION_RATE * Cosmo.current_a, MAX_DELTA_A); + if (da_min > Cosmo.max_delta_a) { da_min = Cosmo.max_delta_a; - chprintf( " Seting max delta_a: %f\n", da_min ); + chprintf(" Seting max delta_a: %f\n", da_min); } - //Small delta_a when reionization starts - #ifdef COOLING_GRACKLE - if ( fabs(Cosmo.current_a + da_min - Cool.scale_factor_UVB_on) < 0.005 ){ + // Small delta_a when reionization starts + #ifdef COOLING_GRACKLE + if (fabs(Cosmo.current_a + da_min - Cool.scale_factor_UVB_on) < 0.005) { da_min /= 2; - chprintf( " Starting UVB. Limiting delta_a: %f \n", da_min); + chprintf(" Starting UVB. Limiting delta_a: %f \n", da_min); } - #endif - #ifdef CHEMISTRY_GPU - if ( fabs(Cosmo.current_a + da_min - Chem.scale_factor_UVB_on) < 0.005 ){ + #endif + #ifdef CHEMISTRY_GPU + if (fabs(Cosmo.current_a + da_min - Chem.scale_factor_UVB_on) < 0.005) { da_min /= 2; - chprintf( " Starting UVB. Limiting delta_a: %f \n", da_min); + chprintf(" Starting UVB. Limiting delta_a: %f \n", da_min); } - #endif - - //Limit delta_a if it's time to output - if ( (Cosmo.current_a + da_min) > Cosmo.next_output ){ - da_min = Cosmo.next_output - Cosmo.current_a; + #endif + + // Limit delta_a if it's time to output + if ((Cosmo.current_a + da_min) > Cosmo.next_output) { + da_min = Cosmo.next_output - Cosmo.current_a; H.Output_Now = true; } - #ifdef ANALYSIS - //Limit delta_a if it's time to run analysis - if( Analysis.next_output_indx < Analysis.n_outputs ){ - if ( H.Output_Now && fabs(Cosmo.current_a + da_min - Analysis.next_output ) < 1e-6 ) Analysis.Output_Now = true; - else if ( Cosmo.current_a + da_min > Analysis.next_output ){ - da_min = Analysis.next_output - Cosmo.current_a; + #ifdef ANALYSIS + // Limit delta_a if it's time to run analysis + if (Analysis.next_output_indx < Analysis.n_outputs) { + if (H.Output_Now && fabs(Cosmo.current_a + da_min - Analysis.next_output) < 1e-6) + Analysis.Output_Now = true; + else if (Cosmo.current_a + da_min > Analysis.next_output) { + da_min = Analysis.next_output - Cosmo.current_a; Analysis.Output_Now = true; } } - #endif - - if ( da_min < 0 ){ - chprintf( "ERROR: Negative delta_a"); + #endif + + if (da_min < 0) { + chprintf("ERROR: Negative delta_a"); exit(-1); - } - - - //Set delta_a after it has been computed + } + + // Set delta_a after it has been computed Cosmo.delta_a = da_min; - //Convert delta_a back to delta_t - dt_min = Cosmo.Get_dt_from_da( Cosmo.delta_a ) * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ); - //Set the new delta_t for the hydro step + // Convert delta_a back to delta_t + dt_min = Cosmo.Get_dt_from_da(Cosmo.delta_a) * Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a); + // Set the new delta_t for the hydro step H.dt = dt_min; - chprintf( " Current_a: %f delta_a: %f dt: %f\n", Cosmo.current_a, Cosmo.delta_a, H.dt ); + chprintf(" Current_a: %f delta_a: %f dt: %f\n", Cosmo.current_a, Cosmo.delta_a, H.dt); - #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell - da_particles = fmin( da_particles, Cosmo.max_delta_a ); - min_dt_slow = Cosmo.Get_dt_from_da( da_particles ) / Particles.C_cfl * Cosmo.H0 / ( Cosmo.current_a * Cosmo.current_a ) / SLOW_FACTOR; + #ifdef AVERAGE_SLOW_CELLS + // Set the min_delta_t for averaging a slow cell + da_particles = fmin(da_particles, Cosmo.max_delta_a); + min_dt_slow = Cosmo.Get_dt_from_da(da_particles) / Particles.C_cfl * Cosmo.H0 / (Cosmo.current_a * Cosmo.current_a) / + SLOW_FACTOR; H.min_dt_slow = min_dt_slow; - #endif + #endif - //Compute the physical time - dt_physical = Cosmo.Get_dt_from_da( Cosmo.delta_a ); + // Compute the physical time + dt_physical = Cosmo.Get_dt_from_da(Cosmo.delta_a); Cosmo.dt_secs = dt_physical * Cosmo.time_conversion; Cosmo.t_secs += Cosmo.dt_secs; - chprintf( " t_physical: %f Myr dt_physical: %f Myr\n", Cosmo.t_secs/MYR, Cosmo.dt_secs/MYR ); + chprintf(" t_physical: %f Myr dt_physical: %f Myr\n", Cosmo.t_secs / MYR, Cosmo.dt_secs / MYR); Particles.dt = dt_physical; - #else // Not Cosmology - //If NOT using COSMOLOGY + #else // Not Cosmology + // If NOT using COSMOLOGY - //Compute the particles delta_t + // Compute the particles delta_t dt_particles = Calc_Particles_dt(); - dt_particles = fmin( dt_particles, Particles.max_dt); - #ifdef ONLY_PARTICLES + dt_particles = fmin(dt_particles, Particles.max_dt); + #ifdef ONLY_PARTICLES dt_min = dt_particles; - chprintf( " dt_particles: %f \n", dt_particles ); - #else - chprintf( " dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles ); - //Get the minimum delta_t between hydro and particles - dt_min = fmin( dt_hydro, dt_particles ); - #endif//ONLY_PARTICLES - - #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell - min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + chprintf(" dt_particles: %f \n", dt_particles); + #else + chprintf(" dt_hydro: %f dt_particles: %f \n", dt_hydro, dt_particles); + // Get the minimum delta_t between hydro and particles + dt_min = fmin(dt_hydro, dt_particles); + #endif // ONLY_PARTICLES + + #ifdef AVERAGE_SLOW_CELLS + // Set the min_delta_t for averaging a slow cell + // min_dt_slow = dt_particles / Particles.C_cfl / SLOW_FACTOR; + min_dt_slow = 3 * H.dx; H.min_dt_slow = min_dt_slow; - #endif + #endif - //Set the new delta_t - H.dt = dt_min; + // Set the new delta_t + H.dt = dt_min; Particles.dt = H.dt; - #endif//COSMOLOGY - #endif//PARTICLES - - #if defined( AVERAGE_SLOW_CELLS) && !defined( PARTICLES ) - //Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is set to a large value, change this with your condition ) - min_dt_slow = H.dt / C_cfl * 100 ; + #endif // COSMOLOGY + #endif // PARTICLES + + #if defined(AVERAGE_SLOW_CELLS) && !defined(PARTICLES) + // Set the min_delta_t for averaging a slow cell ( for now the min_dt_slow is + // set to a large value, change this with your condition ) min_dt_slow = H.dt + // / C_cfl * 100 ; + min_dt_slow = 3 * H.dx; H.min_dt_slow = min_dt_slow; #endif // Set current and previous delta_t for the potential extrapolation - if ( Grav.INITIAL ){ + if (Grav.INITIAL) { Grav.dt_prev = H.dt; - Grav.dt_now = H.dt; - }else{ + Grav.dt_now = H.dt; + } else { Grav.dt_prev = Grav.dt_now; - Grav.dt_now = H.dt; + Grav.dt_now = H.dt; } - + #if defined(PARTICLES_GPU) && defined(PRINT_MAX_MEMORY_USAGE) Particles.Print_Max_Memory_Usage(); #endif } -//NOT USED: Get Average density on the Global dommain -Real Grav3D::Get_Average_Density(){ - +// NOT USED: Get Average density on the Global dommain +Real Grav3D::Get_Average_Density() +{ Real dens_sum, dens_mean; #ifndef PARALLEL_OMP - dens_sum = Get_Average_Density_function( 0, nz_local ); + dens_sum = Get_Average_Density_function(0, nz_local); #else dens_sum = 0; Real dens_sum_all[N_OMP_THREADS]; - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( nz_local, n_omp_procs, omp_id, &g_start, &g_end ); - dens_sum_all[omp_id] = Get_Average_Density_function( g_start, g_end ); - + Get_OMP_Grid_Indxs(nz_local, n_omp_procs, omp_id, &g_start, &g_end); + dens_sum_all[omp_id] = Get_Average_Density_function(g_start, g_end); } - for ( int i=0; i -1) { - #endif - const int k = nz/2; - for (int j = 0; j < ny+ng+ng; j++) { - for (int i = 0; i < nx+ng+ng; i++) { - const long ijk = i+(nx+ng+ng)*(j+(ny+ng+ng)*(k+ng)); - printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); - } - printf("\n"); - } - #if 0 + #endif + const int k = nz / 2; + for (int j = 0; j < ny + ng + ng; j++) { + for (int i = 0; i < nx + ng + ng; i++) { + const long ijk = i + (nx + ng + ng) * (j + (ny + ng + ng) * (k + ng)); + printf("%d %d %g %g %g\n", j, i, q[ijk], p[ijk], q[ijk] - p[ijk]); + } + printf("\n"); + } + #if 0 break; } } - #endif + #endif fflush(stdout); MPI_Finalize(); exit(0); } -#endif - - + #endif -//Initialize the Grav Object at the beginning of the simulation -void Grid3D::Initialize_Gravity( struct parameters *P ){ - chprintf( "\nInitializing Gravity... \n"); - Grav.Initialize( H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, H.n_ghost_potential_offset, P ); - chprintf( "Gravity Successfully Initialized. \n\n"); +// Initialize the Grav Object at the beginning of the simulation +void Grid3D::Initialize_Gravity(struct Parameters *P) +{ + chprintf("\nInitializing Gravity... \n"); + Grav.Initialize(H.xblocal, H.yblocal, H.zblocal, H.xblocal_max, H.yblocal_max, H.zblocal_max, H.xdglobal, H.ydglobal, + H.zdglobal, P->nx, P->ny, P->nz, H.nx_real, H.ny_real, H.nz_real, H.dx, H.dy, H.dz, + H.n_ghost_potential_offset, P); + chprintf("Gravity Successfully Initialized. \n\n"); if (P->bc_potential_type == 1) { + const int ng = N_GHOST_POTENTIAL; + const int twoNG = ng + ng; + const int nk = Grav.nz_local + twoNG; + const int nj = Grav.ny_local + twoNG; + const int ni = Grav.nx_local + twoNG; + const Real dr = 0.5 - ng; - const int ng = N_GHOST_POTENTIAL; - const int twoNG = ng+ng; - const int nk = Grav.nz_local+twoNG; - const int nj = Grav.ny_local+twoNG; - const int ni = Grav.nx_local+twoNG; - const Real dr = 0.5-ng; - - #ifdef PARIS_GALACTIC_TEST + #ifdef PARIS_GALACTIC_TEST chprintf("Analytic Test of Poisson Solvers:\n"); std::vector exact(Grav.n_cells_potential); std::vector potential(Grav.n_cells_potential); - const Real scale = 4.0*M_PI*Grav.Gconst; - const Real ddx = 1.0/(scale*Grav.dx*Grav.dx); - const Real ddy = 1.0/(scale*Grav.dy*Grav.dy); - const Real ddz = 1.0/(scale*Grav.dz*Grav.dz); + const Real scale = 4.0 * M_PI * Grav.Gconst; + const Real ddx = 1.0 / (scale * Grav.dx * Grav.dx); + const Real ddy = 1.0 / (scale * Grav.dy * Grav.dy); + const Real ddz = 1.0 / (scale * Grav.dz * Grav.dz); const Real *const phi = Grav.F.potential_h; - const int nij = ni*nj; - const Real a0 = Galaxies::MW.phi_disk_D3D(0,0); - const Real da0 = 2.0/(25.0*scale); + const int nij = ni * nj; + const Real a0 = galaxies::MW.phi_disk_D3D(0, 0); + const Real da0 = 2.0 / (25.0 * scale); #pragma omp parallel for for (int k = 0; k < nk; k++) { - const Real z = Grav.zMin+Grav.dz*(k+dr); - const int njk = nj*k; + const Real z = Grav.zMin + Grav.dz * (k + dr); + const int njk = nj * k; for (int j = 0; j < nj; j++) { - const Real y = Grav.yMin+Grav.dy*(j+dr); - const Real yy = y*y; - const int nijk = ni*(j+njk); + const Real y = Grav.yMin + Grav.dy * (j + dr); + const Real yy = y * y; + const int nijk = ni * (j + njk); for (int i = 0; i < ni; i++) { - const Real x = Grav.xMin+Grav.dx*(i+dr); - const Real r = sqrt(x*x+yy); - const int ijk = i+nijk; - exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r,z); + const Real x = Grav.xMin + Grav.dx * (i + dr); + const Real r = sqrt(x * x + yy); + const int ijk = i + nijk; + exact[ijk] = potential[ijk] = Grav.F.potential_h[ijk] = galaxies::MW.phi_disk_D3D(r, z); } } } #pragma omp parallel for for (int k = 0; k < Grav.nz_local; k++) { - const Real z = Grav.zMin+Grav.dz*(k+0.5); - const Real zz = z*z; - const int njk = Grav.ny_local*k; + const Real z = Grav.zMin + Grav.dz * (k + 0.5); + const Real zz = z * z; + const int njk = Grav.ny_local * k; for (int j = 0; j < Grav.ny_local; j++) { - const Real y = Grav.yMin+Grav.dy*(j+0.5); - const Real yy = y*y; - const int nijk = Grav.nx_local*(j+njk); + const Real y = Grav.yMin + Grav.dy * (j + 0.5); + const Real yy = y * y; + const int nijk = Grav.nx_local * (j + njk); for (int i = 0; i < Grav.nx_local; i++) { - const Real x = Grav.xMin+Grav.dx*(i+0.5); - const Real r = sqrt(x*x+yy); - const int ijk = i+nijk; - const Real rr = x*x+yy+zz; - const Real f = a0*exp(-0.2*rr); - const Real df = da0*(15.0-2.0*rr)*f; - Grav.F.density_h[ijk] = Galaxies::MW.rho_disk_D3D(r,z)+df; - const int ib = i+ng+ni*(j+ng+nj*(k+ng)); + const Real x = Grav.xMin + Grav.dx * (i + 0.5); + const Real r = sqrt(x * x + yy); + const int ijk = i + nijk; + const Real rr = x * x + yy + zz; + const Real f = a0 * exp(-0.2 * rr); + const Real df = da0 * (15.0 - 2.0 * rr) * f; + Grav.F.density_h[ijk] = galaxies::MW.rho_disk_D3D(r, z) + df; + const int ib = i + ng + ni * (j + ng + nj * (k + ng)); exact[ib] -= f; } } } - Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h,Grav.F.potential_h,Grav.Gconst,Galaxies::MW); + Grav.Poisson_solver_test.Get_Potential(Grav.F.density_h, Grav.F.potential_h, Grav.Gconst, galaxies::MW); chprintf(" Paris Galactic"); - printDiff(Grav.F.potential_h,exact.data(),Grav.nx_local,Grav.ny_local,Grav.nz_local); - Get_Potential_SOR(Grav.Gconst,0,0,P); + printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local); + Get_Potential_SOR(Grav.Gconst, 0, 0, P); chprintf(" SOR"); - printDiff(Grav.F.potential_h,exact.data(),Grav.nx_local,Grav.ny_local,Grav.nz_local); - #endif + printDiff(Grav.F.potential_h, exact.data(), Grav.nx_local, Grav.ny_local, Grav.nz_local); + #endif - #ifdef SOR + #ifdef SOR chprintf(" Initializing disk analytic potential\n"); #pragma omp parallel for for (int k = 0; k < nk; k++) { - const Real z = Grav.zMin+Grav.dz*(k+dr); - const int njk = nj*k; + const Real z = Grav.zMin + Grav.dz * (k + dr); + const int njk = nj * k; for (int j = 0; j < nj; j++) { - const Real y = Grav.yMin+Grav.dy*(j+dr); - const Real yy = y*y; - const int nijk = ni*(j+njk); + const Real y = Grav.yMin + Grav.dy * (j + dr); + const Real yy = y * y; + const int nijk = ni * (j + njk); for (int i = 0; i < ni; i++) { - const Real x = Grav.xMin+Grav.dx*(i+dr); - const Real r = sqrt(x*x+yy); - const int ijk = i+nijk; - Grav.F.potential_h[ijk] = Galaxies::MW.phi_disk_D3D(r,z); + const Real x = Grav.xMin + Grav.dx * (i + dr); + const Real r = sqrt(x * x + yy); + const int ijk = i + nijk; + Grav.F.potential_h[ijk] = galaxies::MW.phi_disk_D3D(r, z); } } } - #endif + #endif } } - -//Compute the Gravitational Potential by solving Poisson Equation -void Grid3D::Compute_Gravitational_Potential( struct parameters *P ){ - +// Compute the Gravitational Potential by solving Poisson Equation +void Grid3D::Compute_Gravitational_Potential(struct Parameters *P) +{ #ifdef CPU_TIME Timer.Grav_Potential.Start(); #endif #ifdef PARTICLES - //Copy the particles density to the grav_density array - Copy_Particles_Density_to_Gravity( *P ); + // Copy the particles density to the grav_density array + Copy_Particles_Density_to_Gravity(*P); #endif #ifndef ONLY_PARTICLES - //Copy the hydro density to the grav_density array + // Copy the hydro density to the grav_density array Copy_Hydro_Density_to_Gravity(); #endif #ifdef COSMOLOGY - //If using cosmology, set the gravitational constant to the one in the correct units + // If using cosmology, set the gravitational constant to the one in the + // correct units const Real Grav_Constant = Cosmo.cosmo_G; - const Real current_a = Cosmo.current_a; - const Real dens_avrg = Cosmo.rho_0_gas; + const Real current_a = Cosmo.current_a; + const Real dens_avrg = Cosmo.rho_0_gas; #else const Real Grav_Constant = Grav.Gconst; // If slowing the Sphere Collapse problem ( bc_potential_type=0 ) const Real dens_avrg = (P->bc_potential_type == 0) ? H.sphere_background_density : 0; - const Real r0 = H.sphere_radius; + const Real r0 = H.sphere_radius; // Re-use current_a as the total mass of the sphere - const Real current_a = (H.sphere_density-dens_avrg)*4.0*M_PI*r0*r0*r0/3.0; + const Real current_a = (H.sphere_density - dens_avrg) * 4.0 * M_PI * r0 * r0 * r0 / 3.0; #endif - if ( !Grav.BC_FLAGS_SET ){ + if (!Grav.BC_FLAGS_SET) { Grav.TRANSFER_POTENTIAL_BOUNDARIES = true; - Set_Boundary_Conditions( *P ); + Set_Boundary_Conditions(*P); Grav.TRANSFER_POTENTIAL_BOUNDARIES = false; // #ifdef MPI_CHOLLA - // printf(" Pid: %d Gravity Boundary Flags: %d %d %d %d %d %d \n", procID, Grav.boundary_flags[0], Grav.boundary_flags[1], Grav.boundary_flags[2], Grav.boundary_flags[3], Grav.boundary_flags[4], Grav.boundary_flags[5] ); + // printf(" Pid: %d Gravity Boundary Flags: %d %d %d %d %d %d \n", procID, + // Grav.boundary_flags[0], Grav.boundary_flags[1], Grav.boundary_flags[2], + // Grav.boundary_flags[3], Grav.boundary_flags[4], Grav.boundary_flags[5] ); // #endif Grav.BC_FLAGS_SET = true; } #ifdef GRAV_ISOLATED_BOUNDARY_X - if ( Grav.boundary_flags[0] == 3 ) Compute_Potential_Boundaries_Isolated(0, P); - if ( Grav.boundary_flags[1] == 3 ) Compute_Potential_Boundaries_Isolated(1, P); + if (Grav.boundary_flags[0] == 3) { + Compute_Potential_Boundaries_Isolated(0, P); + } + if (Grav.boundary_flags[1] == 3) { + Compute_Potential_Boundaries_Isolated(1, P); + } // chprintf("Isolated X\n"); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - if ( Grav.boundary_flags[2] == 3 ) Compute_Potential_Boundaries_Isolated(2, P); - if ( Grav.boundary_flags[3] == 3 ) Compute_Potential_Boundaries_Isolated(3, P); + if (Grav.boundary_flags[2] == 3) { + Compute_Potential_Boundaries_Isolated(2, P); + } + if (Grav.boundary_flags[3] == 3) { + Compute_Potential_Boundaries_Isolated(3, P); + } // chprintf("Isolated Y\n"); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - if ( Grav.boundary_flags[4] == 3 ) Compute_Potential_Boundaries_Isolated(4, P); - if ( Grav.boundary_flags[5] == 3 ) Compute_Potential_Boundaries_Isolated(5, P); + if (Grav.boundary_flags[4] == 3) { + Compute_Potential_Boundaries_Isolated(4, P); + } + if (Grav.boundary_flags[5] == 3) { + Compute_Potential_Boundaries_Isolated(5, P); + } // chprintf("Isolated Z\n"); #endif - //Solve Poisson Equation to compute the potential - //Poisson Equation: laplacian( phi ) = 4 * pi * G / scale_factor * ( dens - dens_average ) + // Solve Poisson Equation to compute the potential + // Poisson Equation: laplacian( phi ) = 4 * pi * G / scale_factor * ( dens - + // dens_average ) Real *input_density, *output_potential; #ifdef GRAVITY_GPU - input_density = Grav.F.density_d; + input_density = Grav.F.density_d; output_potential = Grav.F.potential_d; #else - input_density = Grav.F.density_h; + input_density = Grav.F.density_h; output_potential = Grav.F.potential_h; #endif #ifdef SOR - #ifdef PARIS_GALACTIC_TEST - #ifdef GRAVITY_GPU - #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST" - #endif - Grav.Poisson_solver_test.Get_Potential(input_density,output_potential,Grav_Constant,Galaxies::MW); - std::vector p(output_potential,output_potential+Grav.n_cells_potential); - Get_Potential_SOR( Grav_Constant, dens_avrg, current_a, P ); + #ifdef PARIS_GALACTIC_TEST + #ifdef GRAVITY_GPU + #error "GRAVITY_GPU not yet supported with PARIS_GALACTIC_TEST" + #endif + Grav.Poisson_solver_test.Get_Potential(input_density, output_potential, Grav_Constant, galaxies::MW); + std::vector p(output_potential, output_potential + Grav.n_cells_potential); + Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); chprintf(" Paris vs SOR"); - printDiff(p.data(),output_potential,Grav.nx_local,Grav.ny_local,Grav.nz_local,N_GHOST_POTENTIAL,false); - #else - Get_Potential_SOR( Grav_Constant, dens_avrg, current_a, P ); - #endif + printDiff(p.data(), output_potential, Grav.nx_local, Grav.ny_local, Grav.nz_local, N_GHOST_POTENTIAL, false); + #else + Get_Potential_SOR(Grav_Constant, dens_avrg, current_a, P); + #endif #elif defined PARIS_GALACTIC - Grav.Poisson_solver.Get_Potential(input_density,output_potential,Grav_Constant,Galaxies::MW); + Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, galaxies::MW); #else - Grav.Poisson_solver.Get_Potential( input_density, output_potential, Grav_Constant, dens_avrg, current_a); - #endif//SOR + Grav.Poisson_solver.Get_Potential(input_density, output_potential, Grav_Constant, dens_avrg, current_a); + #endif // SOR #ifdef CPU_TIME Timer.Grav_Potential.End(); #endif - } -#ifdef GRAVITY_ANALYTIC_COMP -void Grid3D::Add_Analytic_Potential(struct parameters *P) { - #ifndef PARALLEL_OMP - Add_Analytic_Galaxy_Potential(0, Grav.nz_local, Galaxies::MW); - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifdef GRAVITY_ANALYTIC_COMP +void Grid3D::Setup_Analytic_Potential(struct Parameters *P) +{ + #ifndef PARALLEL_OMP + Setup_Analytic_Galaxy_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL, galaxies::MW); + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end); - Add_Analytic_Galaxy_Potential(g_start, g_end, Galaxies::MW); + Setup_Analytic_Galaxy_Potential(g_start, g_end, galaxies::MW); } - #endif + #endif + + #ifdef GRAVITY_GPU + GPU_Error_Check(cudaMemcpy(Grav.F.analytic_potential_d, Grav.F.analytic_potential_h, + Grav.n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + #endif } -#endif +void Grid3D::Add_Analytic_Potential() +{ + #ifdef GRAVITY_GPU + Add_Analytic_Potential_GPU(); + #else + #ifndef PARALLEL_OMP + Add_Analytic_Potential(0, Grav.nz_local + 2 * N_GHOST_POTENTIAL); + #else + #pragma omp parallel num_threads(N_OMP_THREADS) + { + int omp_id, n_omp_procs; + int g_start, g_end; -void Grid3D::Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end){ + omp_id = omp_get_thread_num(); + n_omp_procs = omp_get_num_threads(); + Get_OMP_Grid_Indxs(Grav.nz_local + 2 * N_GHOST_POTENTIAL, n_omp_procs, omp_id, &g_start, &g_end); + + Add_Analytic_Potential(g_start, g_end); + } + #endif // PARALLEL_OMP + #endif // GRAVITY_GPU else +} + #endif // GRAVITY_ANALYTIC_COMP + +void Grid3D::Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end) +{ // Copy the density array from hydro conserved to gravity density array Real dens; int i, j, k, id, id_grav; - for (k=g_start; k + #include + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/error_handling.h" -void Grav3D::AllocateMemory_GPU(){ - - CudaSafeCall( cudaMalloc((void**)&F.density_d, n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.potential_d, n_cells_potential*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.potential_1_d, n_cells_potential*sizeof(Real)) ); +void Grav3D::AllocateMemory_GPU() +{ + GPU_Error_Check(cudaMalloc((void **)&F.density_d, n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.potential_d, n_cells_potential * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.potential_1_d, n_cells_potential * sizeof(Real))); #ifdef GRAVITY_GPU - #ifdef GRAV_ISOLATED_BOUNDARY_X - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL*ny_local*nz_local*sizeof(Real)) ); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL*nx_local*nz_local*sizeof(Real)) ); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL*nx_local*ny_local*sizeof(Real)) ); - #endif - - #endif//GRAVITY_GPU - - chprintf( "Allocated Gravity GPU memory \n" ); + #ifdef GRAVITY_ANALYTIC_COMP + GPU_Error_Check(cudaMalloc((void **)&F.analytic_potential_d, n_cells_potential * sizeof(Real))); + #endif + + #ifdef GRAV_ISOLATED_BOUNDARY_X + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_x0_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_x1_d, N_GHOST_POTENTIAL * ny_local * nz_local * sizeof(Real))); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_y0_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_y1_d, N_GHOST_POTENTIAL * nx_local * nz_local * sizeof(Real))); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_z0_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F.pot_boundary_z1_d, N_GHOST_POTENTIAL * nx_local * ny_local * sizeof(Real))); + #endif + + #endif // GRAVITY_GPU + + chprintf("Allocated Gravity GPU memory \n"); } - -void Grav3D::FreeMemory_GPU(void){ - - cudaFree( F.density_d ); - cudaFree( F.potential_d ); - cudaFree( F.potential_1_d ); - +void Grav3D::FreeMemory_GPU(void) +{ + cudaFree(F.density_d); + cudaFree(F.potential_d); + cudaFree(F.potential_1_d); #ifdef GRAVITY_GPU - #ifdef GRAV_ISOLATED_BOUNDARY_X - cudaFree( F.pot_boundary_x0_d); - cudaFree( F.pot_boundary_x1_d); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Y - cudaFree( F.pot_boundary_y0_d); - cudaFree( F.pot_boundary_y1_d); - #endif - #ifdef GRAV_ISOLATED_BOUNDARY_Z - cudaFree( F.pot_boundary_z0_d); - cudaFree( F.pot_boundary_z1_d); - #endif - - #endif //GRAVITY_GPU - + #ifdef GRAVITY_ANALYTIC_COMP + cudaFree(F.analytic_potential_d); + #endif + + #ifdef GRAV_ISOLATED_BOUNDARY_X + cudaFree(F.pot_boundary_x0_d); + cudaFree(F.pot_boundary_x1_d); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Y + cudaFree(F.pot_boundary_y0_d); + cudaFree(F.pot_boundary_y1_d); + #endif + #ifdef GRAV_ISOLATED_BOUNDARY_Z + cudaFree(F.pot_boundary_z0_d); + cudaFree(F.pot_boundary_z1_d); + #endif + + #endif // GRAVITY_GPU } -void __global__ Copy_Hydro_Density_to_Gravity_Kernel( Real *src_density_d, Real *dst_density_d, int nx_local, int ny_local, int nz_local, int n_ghost, Real cosmo_rho_0_gas ){ - +void __global__ Copy_Hydro_Density_to_Gravity_Kernel(Real *src_density_d, Real *dst_density_d, int nx_local, + int ny_local, int nz_local, int n_ghost, Real cosmo_rho_0_gas) +{ int tid_x, tid_y, tid_z, tid_grid, tid_dens; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local ) return; + if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) { + return; + } - tid_dens = tid_x + tid_y*nx_local + tid_z*nx_local*ny_local; + tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local; tid_x += n_ghost; tid_y += n_ghost; tid_z += n_ghost; int nx_grid, ny_grid; - nx_grid = nx_local + 2*n_ghost; - ny_grid = ny_local + 2*n_ghost; - tid_grid = tid_x + tid_y*nx_grid + tid_z*nx_grid*ny_grid; + nx_grid = nx_local + 2 * n_ghost; + ny_grid = ny_local + 2 * n_ghost; + tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid; Real dens; dens = src_density_d[tid_grid]; @@ -88,27 +96,24 @@ void __global__ Copy_Hydro_Density_to_Gravity_Kernel( Real *src_density_d, Real #endif #ifdef PARTICLES - dst_density_d[tid_dens] += dens; //Hydro density is added AFTER partices density + dst_density_d[tid_dens] += dens; // Hydro density is added AFTER partices density #else - dst_density_d[tid_dens] = dens; + dst_density_d[tid_dens] = dens; #endif - } -void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){ - +void Grid3D::Copy_Hydro_Density_to_Gravity_GPU() +{ int nx_local, ny_local, nz_local, n_ghost; nx_local = Grav.nx_local; ny_local = Grav.ny_local; nz_local = Grav.nz_local; n_ghost = H.n_ghost; - - // set values for GPU kernels - int tpb_x = TPBX_GRAV; - int tpb_y = TPBY_GRAV; - int tpb_z = TPBZ_GRAV; + int tpb_x = TPBX_GRAV; + int tpb_y = TPBY_GRAV; + int tpb_z = TPBZ_GRAV; int ngrid_x = (nx_local - 1) / tpb_x + 1; int ngrid_y = (ny_local - 1) / tpb_y + 1; int ngrid_z = (nz_local - 1) / tpb_z + 1; @@ -125,70 +130,132 @@ void Grid3D::Copy_Hydro_Density_to_Gravity_GPU(){ cosmo_rho_0_gas = 1.0; #endif - //Copy the density from the device array to the Poisson input density array - hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_density, Grav.F.density_d, nx_local, ny_local, nz_local, n_ghost, cosmo_rho_0_gas); + // Copy the density from the device array to the Poisson input density array + hipLaunchKernelGGL(Copy_Hydro_Density_to_Gravity_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_density, Grav.F.density_d, + nx_local, ny_local, nz_local, n_ghost, cosmo_rho_0_gas); +} + #if defined(GRAVITY_ANALYTIC_COMP) +void __global__ Add_Analytic_Potential_Kernel(Real *analytic_d, Real *potential_d, int nx_pot, int ny_pot, int nz_pot) +{ + int tid_x, tid_y, tid_z, tid; + tid_x = blockIdx.x * blockDim.x + threadIdx.x; + tid_y = blockIdx.y * blockDim.y + threadIdx.y; + tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) { + return; + } + + tid = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; + + potential_d[tid] += analytic_d[tid]; + /* + if (tid_x < 10 && tid_y == (ny_pot/2) && tid_z == (nz_pot/2)) { + //printf("potential_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, tid_z, + potential_d[tid]); printf("analytic_d[%d, %d, %d] = %.4e\n", tid_x, tid_y, + tid_z, analytic_d[tid]); + } + */ } -void __global__ Extrapolate_Grav_Potential_Kernel( Real *dst_potential, Real *src_potential_0, Real *src_potential_1, - int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, int nz_grid, int n_offset, - Real dt_now, Real dt_prev, bool INITIAL, Real cosmo_factor ){ +void Grid3D::Add_Analytic_Potential_GPU() +{ + int nx_pot, ny_pot, nz_pot; + nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL; + ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL; + nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL; + + // set values for GPU kernels + int tpb_x = TPBX_GRAV; + int tpb_y = TPBY_GRAV; + int tpb_z = TPBZ_GRAV; + + int ngrid_x = (nx_pot - 1) / tpb_x + 1; + int ngrid_y = (ny_pot - 1) / tpb_y + 1; + int ngrid_z = (nz_pot - 1) / tpb_z + 1; + + // number of blocks per 1D grid + dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); + // number of threads per 1D block + dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); + // Copy the analytic potential from the device array to the device potential + // array + hipLaunchKernelGGL(Add_Analytic_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.analytic_potential_d, + Grav.F.potential_d, nx_pot, ny_pot, nz_pot); + cudaDeviceSynchronize(); + /*gpuFor(10, + GPU_LAMBDA(const int i) { + printf("potential_after_analytic[%d, %d, %d] = %.4e\n", i, ny_pot/2, + nz_pot/2, Grav.F.potential_d[i + nx_pot*ny_pot/2 + nx_pot*ny_pot*nz_pot/2]); + } + );*/ +} + #endif // GRAVITY_ANALYTIC_COMP + +void __global__ Extrapolate_Grav_Potential_Kernel(Real *dst_potential, Real *src_potential_0, Real *src_potential_1, + int nx_pot, int ny_pot, int nz_pot, int nx_grid, int ny_grid, + int nz_grid, int n_offset, Real dt_now, Real dt_prev, bool INITIAL, + Real cosmo_factor) +{ int tid_x, tid_y, tid_z, tid_grid, tid_pot; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot ) return; + if (tid_x >= nx_pot || tid_y >= ny_pot || tid_z >= nz_pot) { + return; + } - tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; tid_x += n_offset; tid_y += n_offset; tid_z += n_offset; - tid_grid = tid_x + tid_y*nx_grid + tid_z*nx_grid*ny_grid; + tid_grid = tid_x + tid_y * nx_grid + tid_z * nx_grid * ny_grid; Real pot_now, pot_prev, pot_extrp; - pot_now = src_potential_0[tid_pot]; //Potential at the n-th timestep - if ( INITIAL ){ - pot_extrp = pot_now; //The first timestep the extrapolated potential is phi_0 + pot_now = src_potential_0[tid_pot]; // Potential at the n-th timestep + if (INITIAL) { + pot_extrp = pot_now; // The first timestep the extrapolated potential is phi_0 } else { - pot_prev = src_potential_1[tid_pot]; //Potential at the (n-1)-th timestep ( previous step ) - //Compute the extrapolated potential from phi_n-1 and phi_n - pot_extrp = pot_now + 0.5 * dt_now * ( pot_now - pot_prev ) / dt_prev; + pot_prev = src_potential_1[tid_pot]; // Potential at the (n-1)-th timestep + // ( previous step ) + // Compute the extrapolated potential from phi_n-1 and phi_n + pot_extrp = pot_now + 0.5 * dt_now * (pot_now - pot_prev) / dt_prev; } #ifdef COSMOLOGY - //For cosmological simulation the potential is transformed to 'comoving coordinates' + // For cosmological simulation the potential is transformed to 'comoving + // coordinates' pot_extrp *= cosmo_factor; #endif - //Save the extrapolated potential + // Save the extrapolated potential dst_potential[tid_grid] = pot_extrp; - //Set phi_n-1 = phi_n, to use it during the next step + // Set phi_n-1 = phi_n, to use it during the next step src_potential_1[tid_pot] = pot_now; } -void Grid3D::Extrapolate_Grav_Potential_GPU(){ - +void Grid3D::Extrapolate_Grav_Potential_GPU() +{ int nx_pot, ny_pot, nz_pot; - nx_pot = Grav.nx_local + 2*N_GHOST_POTENTIAL; - ny_pot = Grav.ny_local + 2*N_GHOST_POTENTIAL; - nz_pot = Grav.nz_local + 2*N_GHOST_POTENTIAL; + nx_pot = Grav.nx_local + 2 * N_GHOST_POTENTIAL; + ny_pot = Grav.ny_local + 2 * N_GHOST_POTENTIAL; + nz_pot = Grav.nz_local + 2 * N_GHOST_POTENTIAL; int n_ghost_grid, nx_grid, ny_grid, nz_grid; n_ghost_grid = H.n_ghost; - nx_grid = Grav.nx_local + 2*n_ghost_grid; - ny_grid = Grav.ny_local + 2*n_ghost_grid; - nz_grid = Grav.nz_local + 2*n_ghost_grid; + nx_grid = Grav.nx_local + 2 * n_ghost_grid; + ny_grid = Grav.ny_local + 2 * n_ghost_grid; + nz_grid = Grav.nz_local + 2 * n_ghost_grid; int n_offset = n_ghost_grid - N_GHOST_POTENTIAL; - Real dt_now, dt_prev, cosmo_factor; - dt_now = Grav.dt_now; + dt_now = Grav.dt_now; dt_prev = Grav.dt_prev; #ifdef COSMOLOGY @@ -198,9 +265,9 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){ #endif // set values for GPU kernels - int tpb_x = TPBX_GRAV; - int tpb_y = TPBY_GRAV; - int tpb_z = TPBZ_GRAV; + int tpb_x = TPBX_GRAV; + int tpb_y = TPBY_GRAV; + int tpb_z = TPBZ_GRAV; int ngrid_x = (nx_pot - 1) / tpb_x + 1; int ngrid_y = (ny_pot - 1) / tpb_y + 1; int ngrid_z = (nz_pot - 1) / tpb_z + 1; @@ -209,20 +276,18 @@ void Grid3D::Extrapolate_Grav_Potential_GPU(){ // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor ); - + hipLaunchKernelGGL(Extrapolate_Grav_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, C.d_Grav_potential, + Grav.F.potential_d, Grav.F.potential_1_d, nx_pot, ny_pot, nz_pot, nx_grid, ny_grid, nz_grid, + n_offset, dt_now, dt_prev, Grav.INITIAL, cosmo_factor); } -#ifdef PARTICLES_CPU -void Grid3D::Copy_Potential_From_GPU(){ - CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); + #ifdef PARTICLES_CPU +void Grid3D::Copy_Potential_From_GPU() +{ + GPU_Error_Check(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); cudaDeviceSynchronize(); } -#endif //PARTICLES_CPU - - - - - + #endif // PARTICLES_CPU -#endif //GRAVITY +#endif // GRAVITY diff --git a/src/gravity/gravity_restart.cpp b/src/gravity/gravity_restart.cpp new file mode 100644 index 000000000..d2a09e24d --- /dev/null +++ b/src/gravity/gravity_restart.cpp @@ -0,0 +1,102 @@ +// Special functions needed to make restart (init=Read_Grid) consistent with +// running continuously + +#include + +#ifdef GRAVITY + #include "../gravity/grav3D.h" + #include "../io/io.h" +#endif + +#ifdef MPI_CHOLLA +// provides procID + #include "../mpi/mpi_routines.h" +#endif // MPI_CHOLLA + +#ifdef HDF5 + #include +#endif + +void Gravity_Restart_Filename(char* filename, char* dirname, int nfile) +{ +#ifdef MPI_CHOLLA + sprintf(filename, "%s%d_gravity.h5.%d", dirname, nfile, procID); +#else + sprintf(filename, "%s%d_gravity.h5", dirname, nfile); +#endif +} + +#if defined(GRAVITY) && defined(HDF5) +void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile) +{ + H5open(); + char filename[MAXLEN]; + Gravity_Restart_Filename(filename, P->indir, nfile); + hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + + // Read dt_now + hid_t attribute_id = H5Aopen(file_id, "dt_now", H5P_DEFAULT); + herr_t status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); + status = H5Aclose(attribute_id); + + // Read potential and copy to device to be used as potential n-1 + Read_HDF5_Dataset(file_id, F.potential_1_h, "/potential"); + #ifdef GRAVITY_GPU + GPU_Error_Check( + cudaMemcpy(F.potential_1_d, F.potential_1_h, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + #endif + + H5Fclose(file_id); + H5close(); + + // Set INITIAL to false + INITIAL = false; +} + +void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile) +{ + H5open(); + std::string filename = FnameTemplate(*P).format_fname(nfile, "_gravity"); + hid_t file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + // Write dt_now + hsize_t attr_dims = 1; + hid_t dataspace_id = H5Screate_simple(1, &attr_dims, NULL); + + hid_t attribute_id = H5Acreate(file_id, "dt_now", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); + herr_t status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &dt_now); + status = H5Aclose(attribute_id); + + status = H5Sclose(dataspace_id); + + // Copy device to host if needed + #ifdef GRAVITY_GPU + GPU_Error_Check( + cudaMemcpy(F.potential_1_h, F.potential_1_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost)); + #endif + + // Write potential + hsize_t dims[1]; + dims[0] = n_cells_potential; + + dataspace_id = H5Screate_simple(1, dims, NULL); + Write_HDF5_Dataset(file_id, dataspace_id, F.potential_1_h, "/potential"); + H5Sclose(dataspace_id); + + H5Fclose(file_id); + + H5close(); +} + +#elif defined(GRAVITY) +// Do nothing +void Grav3D::Read_Restart_HDF5(struct Parameters* P, int nfile) +{ + chprintf("WARNING from file %s line %d: Read_Restart_HDF5 did nothing", __FILE__, __LINE__); +} + +void Grav3D::Write_Restart_HDF5(struct Parameters* P, int nfile) +{ + chprintf("WARNING from file %s line %d: Write_Restart_HDF5 did nothing", __FILE__, __LINE__); +} +#endif diff --git a/src/gravity/paris/HenryPeriodic.cu b/src/gravity/paris/HenryPeriodic.cu index cf82c2d38..1602ca737 100644 --- a/src/gravity/paris/HenryPeriodic.cu +++ b/src/gravity/paris/HenryPeriodic.cu @@ -1,103 +1,103 @@ #ifdef PARIS -#include "HenryPeriodic.hpp" + #include + #include + #include + #include -#include -#include -#include -#include + #include "HenryPeriodic.hpp" -HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]): - idi_(id[0]), - idj_(id[1]), - idk_(id[2]), - mi_(m[0]), - mj_(m[1]), - mk_(m[2]), - nh_(n[2]/2+1), - ni_(n[0]), - nj_(n[1]), - nk_(n[2]), - bytes_(0) +HenryPeriodic::HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) + : idi_(id[0]), + idj_(id[1]), + idk_(id[2]), + mi_(m[0]), + mj_(m[1]), + mk_(m[2]), + nh_(n[2] / 2 + 1), + ni_(n[0]), + nj_(n[1]), + nk_(n[2]), + bytes_(0) { // Pencil sub-decomposition within a 3D block mq_ = int(round(sqrt(mk_))); - while (mk_%mq_) mq_--; - mp_ = mk_/mq_; - assert(mp_*mq_ == mk_); + while (mk_ % mq_) { + mq_--; + } + mp_ = mk_ / mq_; + assert(mp_ * mq_ == mk_); - idp_ = idk_/mq_; - idq_ = idk_%mq_; + idp_ = idk_ / mq_; + idq_ = idk_ % mq_; // Communicators of tasks within pencils in each dimension { - const int color = idi_*mj_+idj_; - const int key = idk_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commK_); + const int color = idi_ * mj_ + idj_; + const int key = idk_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commK_); } { - const int color = idi_*mp_+idp_; - const int key = idj_*mq_+idq_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commJ_); + const int color = idi_ * mp_ + idp_; + const int key = idj_ * mq_ + idq_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commJ_); } { - const int color = idj_*mq_+idq_; - const int key = idi_*mp_+idp_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commI_); + const int color = idj_ * mq_ + idq_; + const int key = idi_ * mp_ + idp_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commI_); } // Maximum numbers of elements for various decompositions and dimensions - - dh_ = (nh_+mk_-1)/mk_; - di_ = (ni_+mi_-1)/mi_; - dj_ = (nj_+mj_-1)/mj_; - dk_ = (nk_+mk_-1)/mk_; - dip_ = (di_+mp_-1)/mp_; - djq_ = (dj_+mq_-1)/mq_; - const int mjq = mj_*mq_; - dhq_ = (nh_+mjq-1)/mjq; - const int mip = mi_*mp_; - djp_ = (nj_+mip-1)/mip; + dh_ = (nh_ + mk_ - 1) / mk_; + di_ = (ni_ + mi_ - 1) / mi_; + dj_ = (nj_ + mj_ - 1) / mj_; + dk_ = (nk_ + mk_ - 1) / mk_; + + dip_ = (di_ + mp_ - 1) / mp_; + djq_ = (dj_ + mq_ - 1) / mq_; + const int mjq = mj_ * mq_; + dhq_ = (nh_ + mjq - 1) / mjq; + const int mip = mi_ * mp_; + djp_ = (nj_ + mip - 1) / mip; // Maximum memory needed by work arrays - - const long nMax = std::max( - { long(di_)*long(dj_)*long(dk_), - long(mp_)*long(mq_)*long(dip_)*long(djq_)*long(dk_), - long(2)*long(dip_)*long(djq_)*long(mk_)*long(dh_), - long(2)*long(dip_)*long(mp_)*long(djq_)*long(mq_)*long(dh_), - long(2)*long(dip_)*long(djq_)*long(mjq)*long(dhq_), - long(2)*long(dip_)*long(dhq_)*long(mip)*long(djp_), - long(2)*djp_*long(dhq_)*long(mip)*long(dip_) - }); + + const long nMax = + std::max({long(di_) * long(dj_) * long(dk_), long(mp_) * long(mq_) * long(dip_) * long(djq_) * long(dk_), + long(2) * long(dip_) * long(djq_) * long(mk_) * long(dh_), + long(2) * long(dip_) * long(mp_) * long(djq_) * long(mq_) * long(dh_), + long(2) * long(dip_) * long(djq_) * long(mjq) * long(dhq_), + long(2) * long(dip_) * long(dhq_) * long(mip) * long(djp_), + long(2) * djp_ * long(dhq_) * long(mip) * long(dip_)}); assert(nMax <= INT_MAX); - bytes_ = nMax*sizeof(double); + bytes_ = nMax * sizeof(double); // FFT objects - CHECK(cufftPlanMany(&c2ci_,1,&ni_,&ni_,1,ni_,&ni_,1,ni_,CUFFT_Z2Z,djp_*dhq_)); - CHECK(cufftPlanMany(&c2cj_,1,&nj_,&nj_,1,nj_,&nj_,1,nj_,CUFFT_Z2Z,dip_*dhq_)); - CHECK(cufftPlanMany(&c2rk_,1,&nk_,&nh_,1,nh_,&nk_,1,nk_,CUFFT_Z2D,dip_*djq_)); - CHECK(cufftPlanMany(&r2ck_,1,&nk_,&nk_,1,nk_,&nh_,1,nh_,CUFFT_D2Z,dip_*djq_)); + GPU_Error_Check(cufftPlanMany(&c2ci_, 1, &ni_, &ni_, 1, ni_, &ni_, 1, ni_, CUFFT_Z2Z, djp_ * dhq_)); + GPU_Error_Check(cufftPlanMany(&c2cj_, 1, &nj_, &nj_, 1, nj_, &nj_, 1, nj_, CUFFT_Z2Z, dip_ * dhq_)); + GPU_Error_Check(cufftPlanMany(&c2rk_, 1, &nk_, &nh_, 1, nh_, &nk_, 1, nk_, CUFFT_Z2D, dip_ * djq_)); + GPU_Error_Check(cufftPlanMany(&r2ck_, 1, &nk_, &nk_, 1, nk_, &nh_, 1, nh_, CUFFT_D2Z, dip_ * djq_)); -#ifndef MPI_GPU + #ifndef MPI_GPU // Host arrays for MPI communication - CHECK(cudaHostAlloc(&ha_,bytes_+bytes_,cudaHostAllocDefault)); + GPU_Error_Check(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); - hb_ = ha_+nMax; -#endif + hb_ = ha_ + nMax; + #endif } HenryPeriodic::~HenryPeriodic() { -#ifndef MPI_GPU - CHECK(cudaFreeHost(ha_)); + #ifndef MPI_GPU + GPU_Error_Check(cudaFreeHost(ha_)); ha_ = hb_ = nullptr; -#endif - CHECK(cufftDestroy(r2ck_)); - CHECK(cufftDestroy(c2rk_)); - CHECK(cufftDestroy(c2cj_)); - CHECK(cufftDestroy(c2ci_)); + #endif + GPU_Error_Check(cufftDestroy(r2ck_)); + GPU_Error_Check(cufftDestroy(c2rk_)); + GPU_Error_Check(cufftDestroy(c2cj_)); + GPU_Error_Check(cufftDestroy(c2ci_)); MPI_Comm_free(&commI_); MPI_Comm_free(&commJ_); MPI_Comm_free(&commK_); diff --git a/src/gravity/paris/HenryPeriodic.hpp b/src/gravity/paris/HenryPeriodic.hpp index ab56fde79..0441d5487 100644 --- a/src/gravity/paris/HenryPeriodic.hpp +++ b/src/gravity/paris/HenryPeriodic.hpp @@ -1,73 +1,83 @@ #pragma once -#include #include +#include + #include "../../utils/gpu.hpp" /** * @brief Generic distributed-memory 3D FFT filter. */ -class HenryPeriodic { - public: - - /** - * @param[in] n[3] { Global number of cells in each dimension, without ghost cells. } - * @param[in] lo[3] { Physical location of the global lower bound of each dimension. } - * @param[in] hi[3] { Physical location of the global upper bound of each dimension, minus one grid cell. - * The one-cell difference is because of the periodic domain. - * See @ref Potential_Paris_3D::Initialize for an example computation of these arguments. } - * @param[in] m[3] { Number of MPI tasks in each dimension. } - * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } - */ - HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); - - ~HenryPeriodic(); - - /** - * @return { Number of bytes needed for array arguments for @ref filter. } - */ - size_t bytes() const { return bytes_; } - - /** - * @detail { Performs a 3D FFT on the real input field, - * applies the provided filter in frequency space, - * and perform the inverse 3D FFT. - * Expects fields in 3D block distribution with no ghost cells. } - * @tparam F { Type of functor that will applied in frequency space. - * Should be resolved implicitly by the compiler. } - * @param[in] bytes { Number of bytes allocated for arguments @ref before and @ref after. - * Used to ensure that the arrays have enough extra work space. } - * @param[in,out] before { Input field for filtering. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the original field. } - * @param[out] after { Output field, filtered. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the actual output field. } - * @param[in] f { Functor or lambda function to be used as a filter. - * The operator should have the following prototype. - * \code - * complex f(int i, int j, int k, complex before) - * \endcode - * Arguments `i`, `j`, and `k` are the frequency-space coordinates. - * Argument `before` is the input value at those indices, after the FFT. - * The function should return the filtered value. } - */ - template - void filter(const size_t bytes, double *const before, double *const after, const F f) const; - - private: - int idi_,idj_,idk_; //!< MPI coordinates of 3D block - int mi_,mj_,mk_; //!< Number of MPI tasks in each dimension of 3D domain - int nh_; //!< Global number of complex values in Z dimension, after R2C transform - int ni_,nj_,nk_; //!< Global number of real points in each dimension - int mp_,mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil - int idp_,idq_; //!< X and Y task IDs within Z pencil - MPI_Comm commI_,commJ_,commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils - int dh_,di_,dj_,dk_; //!< Max number of local points in each dimension - int dhq_,dip_,djp_,djq_; //!< Max number of local points in dimensions of 2D decompositions - size_t bytes_; //!< Max bytes needed for argument arrays - cufftHandle c2ci_,c2cj_,c2rk_,r2ck_; //!< Objects for forward and inverse FFTs +class HenryPeriodic +{ + public: + /** + * @param[in] n[3] { Global number of cells in each dimension, without ghost + * cells. } + * @param[in] lo[3] { Physical location of the global lower bound of each + * dimension. } + * @param[in] hi[3] { Physical location of the global upper bound of each + * dimension, minus one grid cell. The one-cell difference is because of the + * periodic domain. See @ref PotentialParis3D::Initialize for an example + * computation of these arguments. } + * @param[in] m[3] { Number of MPI tasks in each dimension. } + * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } + */ + HenryPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); + + ~HenryPeriodic(); + + /** + * @return { Number of bytes needed for array arguments for @ref filter. } + */ + size_t bytes() const { return bytes_; } + + /** + * @detail { Performs a 3D FFT on the real input field, + * applies the provided filter in frequency space, + * and perform the inverse 3D FFT. + * Expects fields in 3D block distribution with no ghost cells. } + * @tparam F { Type of functor that will applied in frequency space. + * Should be resolved implicitly by the compiler. } + * @param[in] bytes { Number of bytes allocated for arguments @ref before and + * @ref after. Used to ensure that the arrays have enough extra work space. } + * @param[in,out] before { Input field for filtering. Modified as a work + * array. Must be at least @ref bytes() bytes, likely larger than the original + * field. } + * @param[out] after { Output field, filtered. Modified as a work array. + * Must be at least @ref bytes() bytes, likely larger than + * the actual output field. } + * @param[in] f { Functor or lambda function to be used as a filter. + * The operator should have the following prototype. + * \code + * complex f(int i, int j, int k, complex before) + * \endcode + * Arguments `i`, `j`, and `k` are the frequency-space + * coordinates. Argument `before` is the input value at those indices, after + * the FFT. The function should return the filtered value. } + */ + template + void filter(const size_t bytes, double *const before, double *const after, const F f) const; + + private: + int idi_, idj_, idk_; //!< MPI coordinates of 3D block + int mi_, mj_, mk_; //!< Number of MPI tasks in each dimension of 3D domain + int nh_; //!< Global number of complex values in Z dimension, after R2C + //!< transform + int ni_, nj_, nk_; //!< Global number of real points in each dimension + int mp_, mq_; //!< Number of MPI tasks in X and Y dimensions of Z pencil + int idp_, idq_; //!< X and Y task IDs within Z pencil + MPI_Comm commI_, commJ_, + commK_; //!< Communicators of fellow tasks in X, Y, and Z pencils + int dh_, di_, dj_, dk_; //!< Max number of local points in each dimension + int dhq_, dip_, djp_, + djq_; //!< Max number of local points in dimensions of 2D decompositions + size_t bytes_; //!< Max bytes needed for argument arrays + cufftHandle c2ci_, c2cj_, c2rk_, + r2ck_; //!< Objects for forward and inverse FFTs #ifndef MPI_GPU - double *ha_, *hb_; //!< Host copies for MPI messages + double *ha_, *hb_; //!< Host copies for MPI messages #endif }; @@ -79,10 +89,10 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Make sure arguments have enough space assert(bytes >= bytes_); - double *const a = after; - double *const b = before; - cufftDoubleComplex *const ac = reinterpret_cast(a); - cufftDoubleComplex *const bc = reinterpret_cast(b); + double *const a = after; + double *const b = before; + cufftDoubleComplex *const ac = reinterpret_cast(a); + cufftDoubleComplex *const bc = reinterpret_cast(b); // Local copies of member variables for lambda capture @@ -96,323 +106,309 @@ void HenryPeriodic::filter(const size_t bytes, double *const before, double *con // Indices and sizes for pencil redistributions - const int idip = idi*mp+idp; - const int idjq = idj*mq+idq; - const int mip = mi*mp; - const int mjq = mj*mq; + const int idip = idi * mp + idp; + const int idjq = idj * mq + idq; + const int mip = mi * mp; + const int mjq = mj * mq; // Reorder 3D block into sub-pencils gpuFor( - mp,mq,dip,djq,dk, - GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { - const int ii = p*dip+i; - const int jj = q*djq+j; - const int ia = k+dk*(j+djq*(i+dip*(q+mq*p))); - const int ib = k+dk*(jj+dj*ii); - a[ia] = b[ib]; - }); + mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { + const int ii = p * dip + i; + const int jj = q * djq + j; + const int ia = k + dk * (j + djq * (i + dip * (q + mq * p))); + const int ib = k + dk * (jj + dj * ii); + a[ia] = b[ib]; + }); // Redistribute into Z pencils - const int countK = dip*djq*dk; -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countK,MPI_DOUBLE,hb_,countK,MPI_DOUBLE,commK_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countK,MPI_DOUBLE,b,countK,MPI_DOUBLE,commK_); -#endif + const int countK = dip * djq * dk; + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_); + #endif // Make Z pencils contiguous in Z { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj*dj+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj * dj + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - iHi-iLo,jHi-jLo,mk,dk, - GPU_LAMBDA(const int i, const int j, const int pq, const int k) { - const int kk = pq*dk+k; - if (kk < nk) { - const int ia = kk+nk*(j+djq*i); - const int ib = k+dk*(j+djq*(i+dip*pq)); - a[ia] = b[ib]; - } - }); + iHi - iLo, jHi - jLo, mk, dk, GPU_LAMBDA(const int i, const int j, const int pq, const int k) { + const int kk = pq * dk + k; + if (kk < nk) { + const int ia = kk + nk * (j + djq * i); + const int ib = k + dk * (j + djq * (i + dip * pq)); + a[ia] = b[ib]; + } + }); } // Real-to-complex FFT in Z - CHECK(cufftExecD2Z(r2ck_,a,bc)); + GPU_Error_Check(cufftExecD2Z(r2ck_, a, bc)); // Rearrange for Y redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj_*dj_+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj_ * dj_ + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - mjq,iHi-iLo,jHi-jLo,dhq, - GPU_LAMBDA(const int q, const int i, const int j, const int k) { - const int kk = q*dhq+k; - if (kk < nh) { - const int ia = k+dhq*(j+djq*(i+dip*q)); - const int ib = kk+nh*(j+djq*i); - ac[ia] = bc[ib]; - } - }); + mjq, iHi - iLo, jHi - jLo, dhq, GPU_LAMBDA(const int q, const int i, const int j, const int k) { + const int kk = q * dhq + k; + if (kk < nh) { + const int ia = k + dhq * (j + djq * (i + dip * q)); + const int ib = kk + nh * (j + djq * i); + ac[ia] = bc[ib]; + } + }); } // Redistribute for Y pencils - const int countJ = 2*dip*djq*dhq; -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countJ,MPI_DOUBLE,hb_,countJ,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countJ,MPI_DOUBLE,b,countJ,MPI_DOUBLE,commJ_); -#endif + const int countJ = 2 * dip * djq * dhq; + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_); + #endif // Make Y pencils contiguous in Y { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - kHi-kLo,iHi-iLo,mj,mq,djq, - GPU_LAMBDA(const int k, const int i, const int r, const int q, const int j) { - const int rdj = r*dj; - const int jj = rdj+q*djq+j; - if ((jj < nj) && (jj < rdj+dj)) { - const int ia = jj+nj*(i+dip*k); - const int ib = k+dhq*(j+djq*(i+dip*(q+mq*r))); - ac[ia] = bc[ib]; - } - }); + kHi - kLo, iHi - iLo, mj, mq, djq, GPU_LAMBDA(const int k, const int i, const int r, const int q, const int j) { + const int rdj = r * dj; + const int jj = rdj + q * djq + j; + if ((jj < nj) && (jj < rdj + dj)) { + const int ia = jj + nj * (i + dip * k); + const int ib = k + dhq * (j + djq * (i + dip * (q + mq * r))); + ac[ia] = bc[ib]; + } + }); } // Forward FFT in Y - CHECK(cufftExecZ2Z(c2cj_,ac,bc,CUFFT_FORWARD)); + GPU_Error_Check(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_FORWARD)); // Rearrange for X redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - mip,kHi-kLo,iHi-iLo,djp, - GPU_LAMBDA(const int p, const int k, const int i, const int j) { - const int jj = p*djp+j; - if (jj < nj) { - const int ia = j+djp*(i+dip*(k+dhq*p)); - const int ib = jj+nj*(i+dip*k); - ac[ia] = bc[ib]; - } - }); + mip, kHi - kLo, iHi - iLo, djp, GPU_LAMBDA(const int p, const int k, const int i, const int j) { + const int jj = p * djp + j; + if (jj < nj) { + const int ia = j + djp * (i + dip * (k + dhq * p)); + const int ib = jj + nj * (i + dip * k); + ac[ia] = bc[ib]; + } + }); } // Redistribute for X pencils - const int countI = 2*dip*djp*dhq; -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countI,MPI_DOUBLE,hb_,countI,MPI_DOUBLE,commI_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countI,MPI_DOUBLE,b,countI,MPI_DOUBLE,commI_); -#endif + const int countI = 2 * dip * djp * dhq; + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_); + #endif // Make X pencils contiguous in X { - const int jLo = idip*djp; - const int jHi = std::min(jLo+djp,nj); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int jLo = idip * djp; + const int jHi = std::min(jLo + djp, nj); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - jHi-jLo,kHi-kLo,mi,mp,dip, - GPU_LAMBDA(const int j, const int k, const int r, const int p, const int i) { - const int rdi = r*di; - const int ii = rdi+p*dip+i; - if ((ii < ni) && (ii < rdi+di)) { - const int ia = ii+ni*(k+dhq*j); - const int ib = j+djp*(i+dip*(k+dhq*(p+mp*r))); - ac[ia] = bc[ib]; - } - }); + jHi - jLo, kHi - kLo, mi, mp, dip, GPU_LAMBDA(const int j, const int k, const int r, const int p, const int i) { + const int rdi = r * di; + const int ii = rdi + p * dip + i; + if ((ii < ni) && (ii < rdi + di)) { + const int ia = ii + ni * (k + dhq * j); + const int ib = j + djp * (i + dip * (k + dhq * (p + mp * r))); + ac[ia] = bc[ib]; + } + }); } // Forward FFT in X - CHECK(cufftExecZ2Z(c2ci_,ac,bc,CUFFT_FORWARD)); + GPU_Error_Check(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_FORWARD)); // Apply filter in frequency space distributed in X pencils - const int jLo = idip*djp; - const int jHi = std::min(jLo+djp,nj); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int jLo = idip * djp; + const int jHi = std::min(jLo + djp, nj); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - jHi-jLo,kHi-kLo,ni, - GPU_LAMBDA(const int j0, const int k0, const int i) { - const int j = jLo+j0; - const int k = kLo+k0; - const int iab = i+ni*(k0+dhq*j0); - ac[iab] = f(i,j,k,bc[iab]); - }); + jHi - jLo, kHi - kLo, ni, GPU_LAMBDA(const int j0, const int k0, const int i) { + const int j = jLo + j0; + const int k = kLo + k0; + const int iab = i + ni * (k0 + dhq * j0); + ac[iab] = f(i, j, k, bc[iab]); + }); // Backward FFT in X - CHECK(cufftExecZ2Z(c2ci_,ac,bc,CUFFT_INVERSE)); + GPU_Error_Check(cufftExecZ2Z(c2ci_, ac, bc, CUFFT_INVERSE)); // Rearrange for Y redistribution { - const int jLo = idip*djp; - const int jHi = std::min(jLo+djp,nj); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int jLo = idip * djp; + const int jHi = std::min(jLo + djp, nj); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - mi,mp,jHi-jLo,kHi-kLo,dip, - GPU_LAMBDA(const int r, const int p, const int j, const int k, const int i) { - const int rdi = r*di; - const int ii = rdi+p*dip+i; - if ((ii < ni) && (ii < rdi+di)) { - const int ia = i+dip*(k+dhq*(j+djp*(p+mp*r))); - const int ib = ii+ni*(k+dhq*j); - ac[ia] = bc[ib]; - } - }); + mi, mp, jHi - jLo, kHi - kLo, dip, GPU_LAMBDA(const int r, const int p, const int j, const int k, const int i) { + const int rdi = r * di; + const int ii = rdi + p * dip + i; + if ((ii < ni) && (ii < rdi + di)) { + const int ia = i + dip * (k + dhq * (j + djp * (p + mp * r))); + const int ib = ii + ni * (k + dhq * j); + ac[ia] = bc[ib]; + } + }); } // Redistribute for Y pencils -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countI,MPI_DOUBLE,hb_,countI,MPI_DOUBLE,commI_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countI,MPI_DOUBLE,b,countI,MPI_DOUBLE,commI_); -#endif + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countI, MPI_DOUBLE, hb_, countI, MPI_DOUBLE, commI_); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(a, countI, MPI_DOUBLE, b, countI, MPI_DOUBLE, commI_); + #endif // Make Y pencils contiguous in Y { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - kHi-kLo,iHi-iLo,mip,djp, - GPU_LAMBDA(const int k, const int i, const int p, const int j) { - const int jj = p*djp+j; - if (jj < nj) { - const int ia = jj+nj*(i+dip*k); - const int ib = i+dip*(k+dhq*(j+djp*p)); - ac[ia] = bc[ib]; - } - }); + kHi - kLo, iHi - iLo, mip, djp, GPU_LAMBDA(const int k, const int i, const int p, const int j) { + const int jj = p * djp + j; + if (jj < nj) { + const int ia = jj + nj * (i + dip * k); + const int ib = i + dip * (k + dhq * (j + djp * p)); + ac[ia] = bc[ib]; + } + }); } // Backward FFT in Y - CHECK(cufftExecZ2Z(c2cj_,ac,bc,CUFFT_INVERSE)); + GPU_Error_Check(cufftExecZ2Z(c2cj_, ac, bc, CUFFT_INVERSE)); // Rearrange for Z redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int kLo = idjq*dhq; - const int kHi = std::min(kLo+dhq,nh); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int kLo = idjq * dhq; + const int kHi = std::min(kLo + dhq, nh); gpuFor( - mj,mq,kHi-kLo,iHi-iLo,djq, - GPU_LAMBDA(const int r, const int q, const int k, const int i, const int j) { - const int rdj = r*dj; - const int jj = rdj+q*djq+j; - if ((jj < nj) && (jj < rdj+dj)) { - const int ia = j+djq*(i+dip*(k+dhq*(q+mq*r))); - const int ib = jj+nj*(i+dip*k); - ac[ia] = bc[ib]; - } - }); + mj, mq, kHi - kLo, iHi - iLo, djq, GPU_LAMBDA(const int r, const int q, const int k, const int i, const int j) { + const int rdj = r * dj; + const int jj = rdj + q * djq + j; + if ((jj < nj) && (jj < rdj + dj)) { + const int ia = j + djq * (i + dip * (k + dhq * (q + mq * r))); + const int ib = jj + nj * (i + dip * k); + ac[ia] = bc[ib]; + } + }); } // Redistribute in Z pencils -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countJ,MPI_DOUBLE,hb_,countJ,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countJ,MPI_DOUBLE,b,countJ,MPI_DOUBLE,commJ_); -#endif + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countJ, MPI_DOUBLE, hb_, countJ, MPI_DOUBLE, commJ_); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(a, countJ, MPI_DOUBLE, b, countJ, MPI_DOUBLE, commJ_); + #endif // Make Z pencils contiguous in Z { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj*dj+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj * dj + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - iHi-iLo,jHi-jLo,mjq,dhq, - GPU_LAMBDA(const int i, const int j, const int q, const int k) { - const int kk = q*dhq+k; - if (kk < nh) { - const int ia = kk+nh*(j+djq*i); - const int ib = j+djq*(i+dip*(k+dhq*q)); - ac[ia] = bc[ib]; - } - }); + iHi - iLo, jHi - jLo, mjq, dhq, GPU_LAMBDA(const int i, const int j, const int q, const int k) { + const int kk = q * dhq + k; + if (kk < nh) { + const int ia = kk + nh * (j + djq * i); + const int ib = j + djq * (i + dip * (k + dhq * q)); + ac[ia] = bc[ib]; + } + }); } // Complex-to-real FFT in Z - CHECK(cufftExecZ2D(c2rk_,ac,b)); + GPU_Error_Check(cufftExecZ2D(c2rk_, ac, b)); // Rearrange for 3D-block redistribution { - const int iLo = idi*di+idp*dip; - const int iHi = std::min({iLo+dip,(idi+1)*di,ni}); - const int jLo = idj*dj+idq*djq; - const int jHi = std::min({jLo+djq,(idj+1)*dj,nj}); + const int iLo = idi * di + idp * dip; + const int iHi = std::min({iLo + dip, (idi + 1) * di, ni}); + const int jLo = idj * dj + idq * djq; + const int jHi = std::min({jLo + djq, (idj + 1) * dj, nj}); gpuFor( - mk,iHi-iLo,jHi-jLo,dk, - GPU_LAMBDA(const int pq, const int i, const int j, const int k) { - const int kk = pq*dk+k; - if (kk < nk) { - const int ia = k+dk*(j+djq*(i+dip*pq)); - const int ib = kk+nk*(j+djq*i); - a[ia] = b[ib]; - } - }); + mk, iHi - iLo, jHi - jLo, dk, GPU_LAMBDA(const int pq, const int i, const int j, const int k) { + const int kk = pq * dk + k; + if (kk < nk) { + const int ia = k + dk * (j + djq * (i + dip * pq)); + const int ib = kk + nk * (j + djq * i); + a[ia] = b[ib]; + } + }); } // Redistribute for 3D blocks -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,a,bytes,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,countK,MPI_DOUBLE,hb_,countK,MPI_DOUBLE,commK_); - CHECK(cudaMemcpy(b,hb_,bytes,cudaMemcpyHostToDevice)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(a,countK,MPI_DOUBLE,b,countK,MPI_DOUBLE,commK_); -#endif + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, a, bytes, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, countK, MPI_DOUBLE, hb_, countK, MPI_DOUBLE, commK_); + GPU_Error_Check(cudaMemcpy(b, hb_, bytes, cudaMemcpyHostToDevice)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(a, countK, MPI_DOUBLE, b, countK, MPI_DOUBLE, commK_); + #endif // Rearrange into 3D blocks and apply FFT normalization { - const double divN = 1.0/(double(ni)*double(nj)*double(nk)); - const int kLo = idk*dk; - const int kHi = std::min(kLo+dk,nk); + const double divN = 1.0 / (double(ni) * double(nj) * double(nk)); + const int kLo = idk * dk; + const int kHi = std::min(kLo + dk, nk); gpuFor( - mp,dip,mq,djq,kHi-kLo, - GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { - const int ii = p*dip+i; - const int jj = q*djq+j; - if ((ii < di) && (jj < dj)) { - const int ia = k+dk*(jj+dj*ii); - const int ib = k+dk*(j+djq*(i+dip*(q+mq*p))); - a[ia] = divN*b[ib]; - } - }); + mp, dip, mq, djq, kHi - kLo, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { + const int ii = p * dip + i; + const int jj = q * djq + j; + if ((ii < di) && (jj < dj)) { + const int ia = k + dk * (jj + dj * ii); + const int ib = k + dk * (j + djq * (i + dip * (q + mq * p))); + a[ia] = divN * b[ib]; + } + }); } } #endif - diff --git a/src/gravity/paris/ParisPeriodic.cu b/src/gravity/paris/ParisPeriodic.cu index 671b42aef..0b2e5ef5a 100644 --- a/src/gravity/paris/ParisPeriodic.cu +++ b/src/gravity/paris/ParisPeriodic.cu @@ -1,31 +1,32 @@ #ifdef PARIS -#include "ParisPeriodic.hpp" + #include -#include + #include "ParisPeriodic.hpp" -__host__ __device__ static inline double sqr(const double x) { return x*x; } +__host__ __device__ static inline double Sqr(const double x) { return x * x; } -ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]): - ni_(n[0]), - nj_(n[1]), -#ifdef PARIS_3PT - nk_(n[2]), - ddi_(2.0*double(n[0]-1)/(hi[0]-lo[0])), - ddj_(2.0*double(n[1]-1)/(hi[1]-lo[1])), - ddk_(2.0*double(n[2]-1)/(hi[2]-lo[2])), -#elif defined PARIS_5PT - nk_(n[2]), - ddi_(sqr(double(n[0]-1)/(hi[0]-lo[0]))/6.0), - ddj_(sqr(double(n[1]-1)/(hi[1]-lo[1]))/6.0), - ddk_(sqr(double(n[2]-1)/(hi[2]-lo[2]))/6.0), -#else - ddi_{2.0*M_PI*double(n[0]-1)/(double(n[0])*(hi[0]-lo[0]))}, - ddj_{2.0*M_PI*double(n[1]-1)/(double(n[1])*(hi[1]-lo[1]))}, - ddk_{2.0*M_PI*double(n[2]-1)/(double(n[2])*(hi[2]-lo[2]))}, -#endif - henry(n,lo,hi,m,id) -{ } +ParisPeriodic::ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]) + : ni_(n[0]), + nj_(n[1]), + #ifdef PARIS_3PT + nk_(n[2]), + ddi_(2.0 * double(n[0] - 1) / (hi[0] - lo[0])), + ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])), + ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])), + #elif defined PARIS_5PT + nk_(n[2]), + ddi_(Sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), + ddj_(Sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), + ddk_(Sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), + #else + ddi_{2.0 * M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))}, + ddj_{2.0 * M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))}, + ddk_{2.0 * M_PI * double(n[2] - 1) / (double(n[2]) * (hi[2] - lo[2]))}, + #endif + henry(n, lo, hi, m, id) +{ +} void ParisPeriodic::solve(const size_t bytes, double *const density, double *const potential) const { @@ -34,44 +35,44 @@ void ParisPeriodic::solve(const size_t bytes, double *const density, double *con const double ddi = ddi_, ddj = ddj_, ddk = ddk_; // Poisson-solve constants that depend on divergence-operator approximation -#ifdef PARIS_3PT - const int nk = nk_; - const double si = M_PI/double(ni); - const double sj = M_PI/double(nj); - const double sk = M_PI/double(nk); -#elif defined PARIS_5PT - const int nk = nk_; - const double si = 2.0*M_PI/double(ni); - const double sj = 2.0*M_PI/double(nj); - const double sk = 2.0*M_PI/double(nk); -#endif + #ifdef PARIS_3PT + const int nk = nk_; + const double si = M_PI / double(ni); + const double sj = M_PI / double(nj); + const double sk = M_PI / double(nk); + #elif defined PARIS_5PT + const int nk = nk_; + const double si = 2.0 * M_PI / double(ni); + const double sj = 2.0 * M_PI / double(nj); + const double sk = 2.0 * M_PI / double(nk); + #endif // Provide FFT filter with a lambda that does Poisson solve in frequency space - henry.filter(bytes,density,potential, - [=] __device__ (const int i, const int j, const int k, const cufftDoubleComplex b) { - if (i || j || k) { -#ifdef PARIS_3PT - const double i2 = sqr(sin(double(min(i,ni-i))*si)*ddi); - const double j2 = sqr(sin(double(min(j,nj-j))*sj)*ddj); - const double k2 = sqr(sin(double(k)*sk)*ddk); -#elif defined PARIS_5PT - const double ci = cos(double(min(i,ni-i))*si); - const double cj = cos(double(min(j,nj-j))*sj); - const double ck = cos(double(k)*sk); - const double i2 = ddi*(2.0*ci*ci-16.0*ci+14.0); - const double j2 = ddj*(2.0*cj*cj-16.0*cj+14.0); - const double k2 = ddk*(2.0*ck*ck-16.0*ck+14.0); -#else - const double i2 = sqr(double(min(i,ni-i))*ddi); - const double j2 = sqr(double(min(j,nj-j))*ddj); - const double k2 = sqr(double(k)*ddk); -#endif - const double d = -1.0/(i2+j2+k2); - return cufftDoubleComplex{d*b.x,d*b.y}; - } else { - return cufftDoubleComplex{0.0,0.0}; - } - }); + henry.filter(bytes, density, potential, + [=] __device__(const int i, const int j, const int k, const cufftDoubleComplex b) { + if (i || j || k) { + #ifdef PARIS_3PT + const double i2 = Sqr(sin(double(min(i, ni - i)) * si) * ddi); + const double j2 = Sqr(sin(double(min(j, nj - j)) * sj) * ddj); + const double k2 = Sqr(sin(double(k) * sk) * ddk); + #elif defined PARIS_5PT + const double ci = cos(double(min(i, ni - i)) * si); + const double cj = cos(double(min(j, nj - j)) * sj); + const double ck = cos(double(k) * sk); + const double i2 = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0); + const double j2 = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); + const double k2 = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0); + #else + const double i2 = Sqr(double(min(i, ni - i)) * ddi); + const double j2 = Sqr(double(min(j, nj - j)) * ddj); + const double k2 = Sqr(double(k) * ddk); + #endif + const double d = -1.0 / (i2 + j2 + k2); + return cufftDoubleComplex{d * b.x, d * b.y}; + } else { + return cufftDoubleComplex{0.0, 0.0}; + } + }); } #endif diff --git a/src/gravity/paris/ParisPeriodic.hpp b/src/gravity/paris/ParisPeriodic.hpp index 92b07becd..8069cde65 100644 --- a/src/gravity/paris/ParisPeriodic.hpp +++ b/src/gravity/paris/ParisPeriodic.hpp @@ -5,44 +5,49 @@ /** * @brief Periodic Poisson solver using @ref Henry FFT filter. */ -class ParisPeriodic { - public: +class ParisPeriodic +{ + public: + /** + * @param[in] n[3] { Global number of cells in each dimension, without ghost + * cells. } + * @param[in] lo[3] { Physical location of the global lower bound of each + * dimension. } + * @param[in] hi[3] { Physical location of the global upper bound of each + * dimension, minus one grid cell. The one-cell difference is because of the + * periodic domain. See @ref PotentialParis3D::Initialize for an example + * computation of these arguments. } + * @param[in] m[3] { Number of MPI tasks in each dimension. } + * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } + */ + ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); - /** - * @param[in] n[3] { Global number of cells in each dimension, without ghost cells. } - * @param[in] lo[3] { Physical location of the global lower bound of each dimension. } - * @param[in] hi[3] { Physical location of the global upper bound of each dimension, minus one grid cell. - * The one-cell difference is because of the periodic domain. - * See @ref Potential_Paris_3D::Initialize for an example computation of these arguments. } - * @param[in] m[3] { Number of MPI tasks in each dimension. } - * @param[in] id[3] { Coordinates of this MPI task, starting at `{0,0,0}`. } - */ - ParisPeriodic(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); + /** + * @return { Number of bytes needed for array arguments for @ref solve. } + */ + size_t bytes() const { return henry.bytes(); } - /** - * @return { Number of bytes needed for array arguments for @ref solve. } - */ - size_t bytes() const { return henry.bytes(); } + /** + * @detail { Solves the Poisson equation for the potential derived from the + * provided density. Assumes periodic boundary conditions. Assumes fields have + * no ghost cells. Uses a 3D FFT provided by the @ref Henry class. } + * @param[in] bytes { Number of bytes allocated for arguments @ref density and + * @ref potential. Used to ensure that the arrays have enough extra work + * space. } + * @param[in,out] density { Input density field. Modified as a work array. + * Must be at least @ref bytes() bytes, likely larger + * than the original field. } + * @param[out] potential { Output potential. Modified as a work array. + * Must be at least @ref bytes() bytes, likely larger + * than the actual output field. } + */ + void solve(size_t bytes, double *density, double *potential) const; - /** - * @detail { Solves the Poisson equation for the potential derived from the provided density. - * Assumes periodic boundary conditions. - * Assumes fields have no ghost cells. - * Uses a 3D FFT provided by the @ref Henry class. } - * @param[in] bytes { Number of bytes allocated for arguments @ref density and @ref potential. - * Used to ensure that the arrays have enough extra work space. } - * @param[in,out] density { Input density field. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the original field. } - * @param[out] potential { Output potential. Modified as a work array. - * Must be at least @ref bytes() bytes, likely larger than the actual output field. } - */ - void solve(size_t bytes, double *density, double *potential) const; - - private: - int ni_,nj_; //!< Number of elements in X and Y dimensions + private: + int ni_, nj_; //!< Number of elements in X and Y dimensions #if defined(PARIS_3PT) || defined(PARIS_5PT) - int nk_; //!< Number of elements in Z dimension + int nk_; //!< Number of elements in Z dimension #endif - double ddi_,ddj_,ddk_; //!< Frequency-independent terms in Poisson solve - HenryPeriodic henry; //!< FFT filter object + double ddi_, ddj_, ddk_; //!< Frequency-independent terms in Poisson solve + HenryPeriodic henry; //!< FFT filter object }; diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.cu b/src/gravity/paris/PoissonZero3DBlockedGPU.cu index 29093e2a3..84e070160 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.cu +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.cu @@ -1,115 +1,123 @@ #ifdef PARIS_GALACTIC -#include "PoissonZero3DBlockedGPU.hpp" + #include + #include + #include + #include + #include -#include -#include -#include -#include -#include + #include "PoissonZero3DBlockedGPU.hpp" static constexpr double sqrt2 = 0.4142135623730950488016887242096980785696718753769480731766797379; -static inline __host__ __device__ double sqr(const double x) { return x*x; } +static inline __host__ __device__ double Sqr(const double x) { return x * x; } -PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]): -#ifdef PARIS_GALACTIC_3PT - ddi_(2.0*double(n[0]-1)/(hi[0]-lo[0])), - ddj_(2.0*double(n[1]-1)/(hi[1]-lo[1])), - ddk_(2.0*double(n[2]-1)/(hi[2]-lo[2])), -#elif defined PARIS_GALACTIC_5PT - ddi_(sqr(double(n[0]-1)/(hi[0]-lo[0]))/6.0), - ddj_(sqr(double(n[1]-1)/(hi[1]-lo[1]))/6.0), - ddk_(sqr(double(n[2]-1)/(hi[2]-lo[2]))/6.0), -#else - ddi_{M_PI*double(n[0]-1)/(double(n[0])*(hi[0]-lo[0]))}, - ddj_{M_PI*double(n[1]-1)/(double(n[1])*(hi[1]-lo[1]))}, - ddk_{M_PI*double(n[2]-1)/(double(n[2])*(hi[2]-lo[2]))}, -#endif - idi_(id[0]), - idj_(id[1]), - idk_(id[2]), - mi_(m[0]), - mj_(m[1]), - mk_(m[2]), - ni_(n[0]), - nj_(n[1]), - nk_(n[2]) +PoissonZero3DBlockedGPU::PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], + const int id[3]) + : + #ifdef PARIS_GALACTIC_3PT + ddi_(2.0 * double(n[0] - 1) / (hi[0] - lo[0])), + ddj_(2.0 * double(n[1] - 1) / (hi[1] - lo[1])), + ddk_(2.0 * double(n[2] - 1) / (hi[2] - lo[2])), + #elif defined PARIS_GALACTIC_5PT + ddi_(Sqr(double(n[0] - 1) / (hi[0] - lo[0])) / 6.0), + ddj_(Sqr(double(n[1] - 1) / (hi[1] - lo[1])) / 6.0), + ddk_(Sqr(double(n[2] - 1) / (hi[2] - lo[2])) / 6.0), + #else + ddi_{M_PI * double(n[0] - 1) / (double(n[0]) * (hi[0] - lo[0]))}, + ddj_{M_PI * double(n[1] - 1) / (double(n[1]) * (hi[1] - lo[1]))}, + ddk_{M_PI * double(n[2] - 1) / (double(n[2]) * (hi[2] - lo[2]))}, + #endif + idi_(id[0]), + idj_(id[1]), + idk_(id[2]), + mi_(m[0]), + mj_(m[1]), + mk_(m[2]), + ni_(n[0]), + nj_(n[1]), + nk_(n[2]) { - mq_ = int(round(sqrt(mk_))); - while (mk_%mq_) mq_--; - mp_ = mk_/mq_; - assert(mp_*mq_ == mk_); + mq_ = int(round(Sqr(mk_))); + while (mk_ % mq_) { + mq_--; + } + mp_ = mk_ / mq_; + assert(mp_ * mq_ == mk_); - idp_ = idk_/mq_; - idq_ = idk_%mq_; + idp_ = idk_ / mq_; + idq_ = idk_ % mq_; { - const int color = idi_*mj_+idj_; - const int key = idk_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commK_); + const int color = idi_ * mj_ + idj_; + const int key = idk_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commK_); } { - const int color = idi_*mp_+idp_; - const int key = idj_*mq_+idq_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commJ_); + const int color = idi_ * mp_ + idp_; + const int key = idj_ * mq_ + idq_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commJ_); } { - const int color = idj_*mq_+idq_; - const int key = idi_*mp_+idp_; - MPI_Comm_split(MPI_COMM_WORLD,color,key,&commI_); + const int color = idj_ * mq_ + idq_; + const int key = idi_ * mp_ + idp_; + MPI_Comm_split(MPI_COMM_WORLD, color, key, &commI_); } - di_ = (ni_+mi_-1)/mi_; - dj_ = (nj_+mj_-1)/mj_; - dk_ = (nk_+mk_-1)/mk_; + di_ = (ni_ + mi_ - 1) / mi_; + dj_ = (nj_ + mj_ - 1) / mj_; + dk_ = (nk_ + mk_ - 1) / mk_; - dip_ = (di_+mp_-1)/mp_; - djq_ = (dj_+mq_-1)/mq_; - const int mjq = mj_*mq_; - dkq_ = (nk_+mjq-1)/mjq; - const int mip = mi_*mp_; - djp_ = (nj_+mip-1)/mip; + dip_ = (di_ + mp_ - 1) / mp_; + djq_ = (dj_ + mq_ - 1) / mq_; + const int mjq = mj_ * mq_; + dkq_ = (nk_ + mjq - 1) / mjq; + const int mip = mi_ * mp_; + djp_ = (nj_ + mip - 1) / mip; - ni2_ = 2*(ni_/2+1); - nj2_ = 2*(nj_/2+1); - nk2_ = 2*(nk_/2+1); + ni2_ = 2 * (ni_ / 2 + 1); + nj2_ = 2 * (nj_ / 2 + 1); + nk2_ = 2 * (nk_ / 2 + 1); - const long nMax = std::max({di_*dj_*dk_,dip_*djq_*mk_*dk_,dip_*mp_*djq_*mq_*dk_,dip_*djq_*nk2_,dip_*djq_*mjq*dkq_,dip_*dkq_*nj2_,dip_*dkq_*mip*djp_,dkq_*djp_*mip*dip_,dkq_*djp_*ni2_}); - bytes_ = nMax*sizeof(double); + const long nMax = std::max({di_ * dj_ * dk_, dip_ * djq_ * mk_ * dk_, dip_ * mp_ * djq_ * mq_ * dk_, + dip_ * djq_ * nk2_, dip_ * djq_ * mjq * dkq_, dip_ * dkq_ * nj2_, + dip_ * dkq_ * mip * djp_, dkq_ * djp_ * mip * dip_, dkq_ * djp_ * ni2_}); + bytes_ = nMax * sizeof(double); - int nkh = nk_/2+1; - CHECK(cufftPlanMany(&d2zk_,1,&nk_,&nk_,1,nk_,&nkh,1,nkh,CUFFT_D2Z,dip_*djq_)); - int njh = nj_/2+1; - CHECK(cufftPlanMany(&d2zj_,1,&nj_,&nj_,1,nj_,&njh,1,njh,CUFFT_D2Z,dip_*dkq_)); - int nih = ni_/2+1; - CHECK(cufftPlanMany(&d2zi_,1,&ni_,&ni_,1,ni_,&nih,1,nih,CUFFT_D2Z,dkq_*djp_)); -#ifndef MPI_GPU - CHECK(cudaHostAlloc(&ha_,bytes_+bytes_,cudaHostAllocDefault)); + int nkh = nk_ / 2 + 1; + GPU_Error_Check(cufftPlanMany(&d2zk_, 1, &nk_, &nk_, 1, nk_, &nkh, 1, nkh, CUFFT_D2Z, dip_ * djq_)); + int njh = nj_ / 2 + 1; + GPU_Error_Check(cufftPlanMany(&d2zj_, 1, &nj_, &nj_, 1, nj_, &njh, 1, njh, CUFFT_D2Z, dip_ * dkq_)); + int nih = ni_ / 2 + 1; + GPU_Error_Check(cufftPlanMany(&d2zi_, 1, &ni_, &ni_, 1, ni_, &nih, 1, nih, CUFFT_D2Z, dkq_ * djp_)); + #ifndef MPI_GPU + GPU_Error_Check(cudaHostAlloc(&ha_, bytes_ + bytes_, cudaHostAllocDefault)); assert(ha_); - hb_ = ha_+nMax; -#endif + hb_ = ha_ + nMax; + #endif } PoissonZero3DBlockedGPU::~PoissonZero3DBlockedGPU() { -#ifndef MPI_GPU - CHECK(cudaFreeHost(ha_)); + #ifndef MPI_GPU + GPU_Error_Check(cudaFreeHost(ha_)); ha_ = hb_ = nullptr; -#endif - CHECK(cufftDestroy(d2zi_)); - CHECK(cufftDestroy(d2zj_)); - CHECK(cufftDestroy(d2zk_)); + #endif + GPU_Error_Check(cufftDestroy(d2zi_)); + GPU_Error_Check(cufftDestroy(d2zj_)); + GPU_Error_Check(cufftDestroy(d2zk_)); MPI_Comm_free(&commI_); MPI_Comm_free(&commJ_); MPI_Comm_free(&commK_); } -void print(const char *const title, const int ni, const int nj, const int nk, const double *const v) +void Print(const char *const title, const int ni, const int nj, const int nk, const double *const v) { - printf("%s:\n",title); + printf("%s:\n", title); for (int i = 0; i < ni; i++) { for (int j = 0; j < nj; j++) { - for (int k = 0; k < nk; k++) printf("%.6f ",v[(i*nj+j)*nk+k]); + for (int k = 0; k < nk; k++) { + printf("%.6f ", v[(i * nj + j) * nk + k]); + } printf(" "); } printf("\n"); @@ -125,400 +133,392 @@ void PoissonZero3DBlockedGPU::solve(const long bytes, double *const density, dou double *const ua = potential; double *const ub = density; - cufftDoubleComplex *const uc = reinterpret_cast(ub); + auto *const uc = reinterpret_cast(ub); const double ddi = ddi_; const double ddj = ddj_; const double ddk = ddk_; - const int di = di_; - const int dip = dip_; - const int dj = dj_; - const int djp = djp_; - const int djq = djq_; - const int dk = dk_; - const int dkq = dkq_; - const int idi = idi_; - const int idj = idj_; - const int idp = idp_; - const int idq = idq_; - const int mp = mp_; - const int mq = mq_; - const int ni = ni_; - const int ni2 = ni2_; - const int nj = nj_; - const int nj2 = nj2_; - const int nk = nk_; - const int nk2 = nk2_; + const int di = di_; + const int dip = dip_; + const int dj = dj_; + const int djp = djp_; + const int djq = djq_; + const int dk = dk_; + const int dkq = dkq_; + const int idi = idi_; + const int idj = idj_; + const int idp = idp_; + const int idq = idq_; + const int mp = mp_; + const int mq = mq_; + const int ni = ni_; + const int ni2 = ni2_; + const int nj = nj_; + const int nj2 = nj2_; + const int nk = nk_; + const int nk2 = nk2_; gpuFor( - mp,mq,dip,djq,dk, - GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { - const int iLo = p*dip; - const int jLo = q*djq; - if ((i+iLo < di) && (j+jLo < dj)) ua[(((p*mq+q)*dip+i)*djq+j)*dk+k] = ub[((i+iLo)*dj+j+jLo)*dk+k]; - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*djq*dk,MPI_DOUBLE,hb_,dip*djq*dk,MPI_DOUBLE,commK_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*djq*dk,MPI_DOUBLE,ub,dip*djq*dk,MPI_DOUBLE,commK_); -#endif + mp, mq, dip, djq, dk, GPU_LAMBDA(const int p, const int q, const int i, const int j, const int k) { + const int iLo = p * dip; + const int jLo = q * djq; + if ((i + iLo < di) && (j + jLo < dj)) { + ua[(((p * mq + q) * dip + i) * djq + j) * dk + k] = ub[((i + iLo) * dj + j + jLo) * dk + k]; + } + }); + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_); + #endif gpuFor( - dip,djq,nk/2+1, - GPU_LAMBDA(const int i, const int j, const int k) { - const int ij = (i*djq+j)*nk; - const int kk = k+k; - if (k == 0) { - ua[ij] = ub[(i*djq+j)*dk]; - } else if (kk == nk) { - const int pq = (nk-1)/dk; - const int kpq = (nk-1)%dk; - ua[ij+k] = -ub[((pq*dip+i)*djq+j)*dk+kpq]; - } else { - const int pqa = (kk-1)/dk; - const int kka = (kk-1)%dk; - ua[ij+(nk-k)] = -ub[((pqa*dip+i)*djq+j)*dk+kka]; - const int pqb = kk/dk; - const int kkb = kk%dk; - ua[ij+k] = ub[((pqb*dip+i)*djq+j)*dk+kkb]; - } - }); - CHECK(cufftExecD2Z(d2zk_,ua,uc)); + dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { + const int ij = (i * djq + j) * nk; + const int kk = k + k; + if (k == 0) { + ua[ij] = ub[(i * djq + j) * dk]; + } else if (kk == nk) { + const int pq = (nk - 1) / dk; + const int kpq = (nk - 1) % dk; + ua[ij + k] = -ub[((pq * dip + i) * djq + j) * dk + kpq]; + } else { + const int pqa = (kk - 1) / dk; + const int kka = (kk - 1) % dk; + ua[ij + (nk - k)] = -ub[((pqa * dip + i) * djq + j) * dk + kka]; + const int pqb = kk / dk; + const int kkb = kk % dk; + ua[ij + k] = ub[((pqb * dip + i) * djq + j) * dk + kkb]; + } + }); + GPU_Error_Check(cufftExecD2Z(d2zk_, ua, uc)); gpuFor( - dip,nk/2+1,djq, - GPU_LAMBDA(const int i, const int k, const int j) { - if (k == 0) { - const int q0 = (nk-1)/dkq; - const int k0 = (nk-1)%dkq; - ua[((q0*dip+i)*dkq+k0)*djq+j] = 2.0*ub[(i*djq+j)*nk2]; - } else if (k+k == nk) { - const int qa = (nk/2-1)/dkq; - const int ka = (nk/2-1)%dkq; - ua[((qa*dip+i)*dkq+ka)*djq+j] = sqrt2*ub[(i*djq+j)*nk2+nk]; - } else { - const int qa = (nk-k-1)/dkq; - const int ka = (nk-k-1)%dkq; - const int qb = (k-1)/dkq; - const int kb = (k-1)%dkq; - const double ak = 2.0*ub[(i*djq+j)*nk2+2*k]; - const double bk = 2.0*ub[(i*djq+j)*nk2+2*k+1]; - double wa,wb; - sincospi(double(k)/double(nk+nk),&wb,&wa); - ua[((qa*dip+i)*dkq+ka)*djq+j] = wa*ak+wb*bk; - ua[((qb*dip+i)*dkq+kb)*djq+j] = wb*ak-wa*bk; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*dkq*djq,MPI_DOUBLE,hb_,dip*dkq*djq,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*dkq*djq,MPI_DOUBLE,ub,dip*dkq*djq,MPI_DOUBLE,commJ_); -#endif + dip, nk / 2 + 1, djq, GPU_LAMBDA(const int i, const int k, const int j) { + if (k == 0) { + const int q0 = (nk - 1) / dkq; + const int k0 = (nk - 1) % dkq; + ua[((q0 * dip + i) * dkq + k0) * djq + j] = 2.0 * ub[(i * djq + j) * nk2]; + } else if (k + k == nk) { + const int qa = (nk / 2 - 1) / dkq; + const int ka = (nk / 2 - 1) % dkq; + ua[((qa * dip + i) * dkq + ka) * djq + j] = sqrt2 * ub[(i * djq + j) * nk2 + nk]; + } else { + const int qa = (nk - k - 1) / dkq; + const int ka = (nk - k - 1) % dkq; + const int qb = (k - 1) / dkq; + const int kb = (k - 1) % dkq; + const double ak = 2.0 * ub[(i * djq + j) * nk2 + 2 * k]; + const double bk = 2.0 * ub[(i * djq + j) * nk2 + 2 * k + 1]; + double wa, wb; + sincospi(double(k) / double(nk + nk), &wb, &wa); + ua[((qa * dip + i) * dkq + ka) * djq + j] = wa * ak + wb * bk; + ua[((qb * dip + i) * dkq + kb) * djq + j] = wb * ak - wa * bk; + } + }); + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * dkq * djq, MPI_DOUBLE, hb_, dip * dkq * djq, MPI_DOUBLE, commJ_); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(ua, dip * dkq * djq, MPI_DOUBLE, ub, dip * dkq * djq, MPI_DOUBLE, commJ_); + #endif gpuFor( - dip,dkq,nj/2+1, - GPU_LAMBDA(const int i, const int k, const int j) { - const int ik = (i*dkq+k)*nj; - if (j == 0) { - ua[ik] = ub[(i*dkq+k)*djq]; - } else if (j+j == nj) { - const int qa = (nj-1)/djq; - const int ja = (nj-1)%djq; - ua[ik+nj/2] = -ub[((qa*dip+i)*dkq+k)*djq+ja]; - } else { - const int qa = (j+j-1)/djq; - const int ja = (j+j-1)%djq; - ua[ik+nj-j] = -ub[((qa*dip+i)*dkq+k)*djq+ja]; - const int qb = (j+j)/djq; - const int jb = (j+j)%djq; - ua[ik+j] = ub[((qb*dip+i)*dkq+k)*djq+jb]; - } - }); - CHECK(cufftExecD2Z(d2zj_,ua,uc)); + dip, dkq, nj / 2 + 1, GPU_LAMBDA(const int i, const int k, const int j) { + const int ik = (i * dkq + k) * nj; + if (j == 0) { + ua[ik] = ub[(i * dkq + k) * djq]; + } else if (j + j == nj) { + const int qa = (nj - 1) / djq; + const int ja = (nj - 1) % djq; + ua[ik + nj / 2] = -ub[((qa * dip + i) * dkq + k) * djq + ja]; + } else { + const int qa = (j + j - 1) / djq; + const int ja = (j + j - 1) % djq; + ua[ik + nj - j] = -ub[((qa * dip + i) * dkq + k) * djq + ja]; + const int qb = (j + j) / djq; + const int jb = (j + j) % djq; + ua[ik + j] = ub[((qb * dip + i) * dkq + k) * djq + jb]; + } + }); + GPU_Error_Check(cufftExecD2Z(d2zj_, ua, uc)); gpuFor( - dkq,nj/2+1,dip, - GPU_LAMBDA(const int k, const int j, const int i) { - if (j == 0) { - const int pa = (nj-1)/djp; - const int ja = (nj-1)%djp; - ua[((pa*dkq+k)*djp+ja)*dip+i] = 2.0*ub[(i*dkq+k)*nj2]; - } else if (j+j == nj) { - const int pa = (nj/2-1)/djp; - const int ja = (nj/2-1)%djp; - ua[((pa*dkq+k)*djp+ja)*dip+i] = sqrt2*ub[(i*dkq+k)*nj2+nj]; - } else { - const double aj = 2.0*ub[(i*dkq+k)*nj2+2*j]; - const double bj = 2.0*ub[(i*dkq+k)*nj2+2*j+1]; - double wa,wb; - sincospi(double(j)/double(nj+nj),&wb,&wa); - const int pa = (nj-j-1)/djp; - const int ja = (nj-j-1)%djp; - const int pb = (j-1)/djp; - const int jb = (j-1)%djp; - ua[((pa*dkq+k)*djp+ja)*dip+i] = wa*aj+wb*bj; - ua[((pb*dkq+k)*djp+jb)*dip+i] = wb*aj-wa*bj; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dkq*djp*dip,MPI_DOUBLE,hb_,dkq*djp*dip,MPI_DOUBLE,commI_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dkq*djp*dip,MPI_DOUBLE,ub,dkq*djp*dip,MPI_DOUBLE,commI_); -#endif + dkq, nj / 2 + 1, dip, GPU_LAMBDA(const int k, const int j, const int i) { + if (j == 0) { + const int pa = (nj - 1) / djp; + const int ja = (nj - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = 2.0 * ub[(i * dkq + k) * nj2]; + } else if (j + j == nj) { + const int pa = (nj / 2 - 1) / djp; + const int ja = (nj / 2 - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = sqrt2 * ub[(i * dkq + k) * nj2 + nj]; + } else { + const double aj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j]; + const double bj = 2.0 * ub[(i * dkq + k) * nj2 + 2 * j + 1]; + double wa, wb; + sincospi(double(j) / double(nj + nj), &wb, &wa); + const int pa = (nj - j - 1) / djp; + const int ja = (nj - j - 1) % djp; + const int pb = (j - 1) / djp; + const int jb = (j - 1) % djp; + ua[((pa * dkq + k) * djp + ja) * dip + i] = wa * aj + wb * bj; + ua[((pb * dkq + k) * djp + jb) * dip + i] = wb * aj - wa * bj; + } + }); + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_); + #endif gpuFor( - dkq,djp,ni/2+1, - GPU_LAMBDA(const int k, const int j, const int i) { - const int kj = (k*djp+j)*ni; - if (i == 0) { - ua[kj] = ub[(k*djp+j)*dip]; - } else if (i+i == ni) { - const int ida = (ni-1)/di; - const int pa = (ni-1)%di/dip; - const int ia = ni-1-ida*di-pa*dip; - ua[kj+ni/2] = -ub[(((ida*mp+pa)*dkq+k)*djp+j)*dip+ia]; - } else { - const int ida = (i+i-1)/di; - const int pa = (i+i-1)%di/dip; - const int ia = i+i-1-ida*di-pa*dip; - ua[kj+ni-i] = -ub[(((ida*mp+pa)*dkq+k)*djp+j)*dip+ia]; - const int idb = (i+i)/di; - const int pb = (i+i)%di/dip; - const int ib = i+i-idb*di-pb*dip; - ua[kj+i] = ub[(((idb*mp+pb)*dkq+k)*djp+j)*dip+ib]; - } - }); - CHECK(cufftExecD2Z(d2zi_,ua,uc)); - { -#ifdef PARIS_GALACTIC_3PT - const double si = M_PI/double(ni+ni); - const double sj = M_PI/double(nj+nj); - const double sk = M_PI/double(nk+nk); - const double iin = sqr(sin(double(ni)*si)*ddi); -#elif defined PARIS_GALACTIC_5PT - const double si = M_PI/double(ni); - const double sj = M_PI/double(nj); - const double sk = M_PI/double(nk); - const double cin = cos(double(ni)*si); - const double iin = ddi*(2.0*cin*cin-16.0*cin+14.0); -#else - const double iin = sqr(double(ni)*ddi); -#endif - const int jLo = (idi*mp+idp)*djp; - const int kLo = (idj*mq+idq)*dkq; - gpuFor( - dkq,djp,ni/2+1, - GPU_LAMBDA(const int k, const int j, const int i) { - const int kj = (k*djp+j)*ni; - const int kj2 = (k*djp+j)*ni2; -#ifdef PARIS_GALACTIC_3PT - const double jjkk = sqr(sin(double(jLo+j+1)*sj)*ddj)+sqr(sin(double(kLo+k+1)*sk)*ddk); -#elif defined PARIS_GALACTIC_5PT - const double cj = cos(double(jLo+j+1)*sj); - const double jj = ddj*(2.0*cj*cj-16.0*cj+14.0); - const double ck = cos(double(kLo+k+1)*sk); - const double kk = ddk*(2.0*ck*ck-16.0*ck+14.0); - const double jjkk = jj+kk; -#else - const double jjkk = sqr(double(jLo+j+1)*ddj)+sqr(double(kLo+k+1)*ddk); -#endif + dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) { + const int kj = (k * djp + j) * ni; if (i == 0) { - ua[kj] = -2.0*ub[kj2]/(iin+jjkk); + ua[kj] = ub[(k * djp + j) * dip]; + } else if (i + i == ni) { + const int ida = (ni - 1) / di; + const int pa = (ni - 1) % di / dip; + const int ia = ni - 1 - ida * di - pa * dip; + ua[kj + ni / 2] = -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; } else { -#ifdef PARIS_GALACTIC_3PT - const double ii = sqr(sin(double(i)*si)*ddi); -#elif defined PARIS_GALACTIC_5PT - const double ci = cos(double(i)*si); - const double ii = ddi*(2.0*ci*ci-16.0*ci+14.0); -#else - const double ii = sqr(double(i)*ddi); -#endif - if (i+i == ni) { - ua[kj+ni/2] = -2.0*ub[kj2+ni]/(ii+jjkk); - } else { - const double ai = 2.0*ub[kj2+2*i]; - const double bi = 2.0*ub[kj2+2*i+1]; - double wa,wb; - sincospi(double(i)/double(ni+ni),&wb,&wa); -#ifdef PARIS_GALACTIC_3PT - const double nii = sqr(sin(double(ni-i)*si)*ddi); -#elif defined PARIS_GALACTIC_5PT - const double cni = cos(double(ni-i)*si); - const double nii = ddi*(2.0*cni*cni-16.0*cni+14.0); -#else - const double nii = sqr(double(ni-i)*ddi); -#endif - const double aai = -(wa*ai+wb*bi)/(nii+jjkk); - const double bbi = (wa*bi-wb*ai)/(ii+jjkk); - const double apb = aai+bbi; - const double amb = aai-bbi; - ua[kj+i] = wa*amb+wb*apb; - ua[kj+ni-i] = wa*apb-wb*amb; - } + const int ida = (i + i - 1) / di; + const int pa = (i + i - 1) % di / dip; + const int ia = i + i - 1 - ida * di - pa * dip; + ua[kj + ni - i] = -ub[(((ida * mp + pa) * dkq + k) * djp + j) * dip + ia]; + const int idb = (i + i) / di; + const int pb = (i + i) % di / dip; + const int ib = i + i - idb * di - pb * dip; + ua[kj + i] = ub[(((idb * mp + pb) * dkq + k) * djp + j) * dip + ib]; } }); + GPU_Error_Check(cufftExecD2Z(d2zi_, ua, uc)); + { + #ifdef PARIS_GALACTIC_3PT + const double si = M_PI / double(ni + ni); + const double sj = M_PI / double(nj + nj); + const double sk = M_PI / double(nk + nk); + const double iin = Sqr(sin(double(ni) * si) * ddi); + #elif defined PARIS_GALACTIC_5PT + const double si = M_PI / double(ni); + const double sj = M_PI / double(nj); + const double sk = M_PI / double(nk); + const double cin = cos(double(ni) * si); + const double iin = ddi * (2.0 * cin * cin - 16.0 * cin + 14.0); + #else + const double iin = Sqr(double(ni) * ddi); + #endif + const int jLo = (idi * mp + idp) * djp; + const int kLo = (idj * mq + idq) * dkq; + gpuFor( + dkq, djp, ni / 2 + 1, GPU_LAMBDA(const int k, const int j, const int i) { + const int kj = (k * djp + j) * ni; + const int kj2 = (k * djp + j) * ni2; + #ifdef PARIS_GALACTIC_3PT + const double jjkk = Sqr(sin(double(jLo + j + 1) * sj) * ddj) + Sqr(sin(double(kLo + k + 1) * sk) * ddk); + #elif defined PARIS_GALACTIC_5PT + const double cj = cos(double(jLo + j + 1) * sj); + const double jj = ddj * (2.0 * cj * cj - 16.0 * cj + 14.0); + const double ck = cos(double(kLo + k + 1) * sk); + const double kk = ddk * (2.0 * ck * ck - 16.0 * ck + 14.0); + const double jjkk = jj + kk; + #else + const double jjkk = + Sqr(double(jLo + j + 1) * ddj) + Sqr(double(kLo + k + 1) * ddk); + #endif + if (i == 0) { + ua[kj] = -2.0 * ub[kj2] / (iin + jjkk); + } else { + #ifdef PARIS_GALACTIC_3PT + const double ii = Sqr(sin(double(i) * si) * ddi); + #elif defined PARIS_GALACTIC_5PT + const double ci = cos(double(i) * si); + const double ii = ddi * (2.0 * ci * ci - 16.0 * ci + 14.0); + #else + const double ii = Sqr(double(i) * ddi); + #endif + if (i + i == ni) { + ua[kj + ni / 2] = -2.0 * ub[kj2 + ni] / (ii + jjkk); + } else { + const double ai = 2.0 * ub[kj2 + 2 * i]; + const double bi = 2.0 * ub[kj2 + 2 * i + 1]; + double wa, wb; + sincospi(double(i) / double(ni + ni), &wb, &wa); + #ifdef PARIS_GALACTIC_3PT + const double nii = t(sin(double(ni - i) * si) * ddi); + #elif defined PARIS_GALACTIC_5PT + const double cni = cos(double(ni - i) * si); + const double nii = ddi * (2.0 * cni * cni - 16.0 * cni + 14.0); + #else + const double nii = Sqr(double(ni - i) * ddi); + #endif + const double aai = -(wa * ai + wb * bi) / (nii + jjkk); + const double bbi = (wa * bi - wb * ai) / (ii + jjkk); + const double apb = aai + bbi; + const double amb = aai - bbi; + ua[kj + i] = wa * amb + wb * apb; + ua[kj + ni - i] = wa * apb - wb * amb; + } + } + }); } - CHECK(cufftExecD2Z(d2zi_,ua,uc)); + GPU_Error_Check(cufftExecD2Z(d2zi_, ua, uc)); gpuFor( - dkq,ni/2+1,djp, - GPU_LAMBDA(const int k, const int i, const int j) { - if (i == 0) { - ua[k*dip*djp+j] = ub[(k*djp+j)*ni2]; - } else if (i+i == ni) { - const int ida = (ni-1)/di; - const int pa = (ni-1)%di/dip; - const int ia = ni-1-ida*di-pa*dip; - ua[(((ida*mp+pa)*dkq+k)*dip+ia)*djp+j] = -ub[(k*djp+j)*ni2+ni]; - } else { - const double ai = ub[(k*djp+j)*ni2+i+i]; - const double bi = ub[(k*djp+j)*ni2+i+i+1]; - const int ida = (i+i-1)/di; - const int pa = (i+i-1)%di/dip; - const int ia = i+i-1-ida*di-pa*dip; - ua[(((ida*mp+pa)*dkq+k)*dip+ia)*djp+j] = bi-ai; - const int idb = (i+i)/di; - const int pb = (i+i)%di/dip; - const int ib = i+i-idb*di-pb*dip; - ua[(((idb*mp+pb)*dkq+k)*dip+ib)*djp+j] = ai+bi; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dkq*djp*dip,MPI_DOUBLE,hb_,dkq*djp*dip,MPI_DOUBLE,commI_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dkq*djp*dip,MPI_DOUBLE,ub,dkq*djp*dip,MPI_DOUBLE,commI_); -#endif + dkq, ni / 2 + 1, djp, GPU_LAMBDA(const int k, const int i, const int j) { + if (i == 0) { + ua[k * dip * djp + j] = ub[(k * djp + j) * ni2]; + } else if (i + i == ni) { + const int ida = (ni - 1) / di; + const int pa = (ni - 1) % di / dip; + const int ia = ni - 1 - ida * di - pa * dip; + ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = -ub[(k * djp + j) * ni2 + ni]; + } else { + const double ai = ub[(k * djp + j) * ni2 + i + i]; + const double bi = ub[(k * djp + j) * ni2 + i + i + 1]; + const int ida = (i + i - 1) / di; + const int pa = (i + i - 1) % di / dip; + const int ia = i + i - 1 - ida * di - pa * dip; + ua[(((ida * mp + pa) * dkq + k) * dip + ia) * djp + j] = bi - ai; + const int idb = (i + i) / di; + const int pb = (i + i) % di / dip; + const int ib = i + i - idb * di - pb * dip; + ua[(((idb * mp + pb) * dkq + k) * dip + ib) * djp + j] = ai + bi; + } + }); + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dkq * djp * dip, MPI_DOUBLE, hb_, dkq * djp * dip, MPI_DOUBLE, commI_); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(ua, dkq * djp * dip, MPI_DOUBLE, ub, dkq * djp * dip, MPI_DOUBLE, commI_); + #endif gpuFor( - dkq,dip,nj/2+1, - GPU_LAMBDA(const int k, const int i, const int j) { - const long ki = (k*dip+i)*nj; - if (j == 0) { - const int pa = (nj-1)/djp; - const int ja = (nj-1)-pa*djp; - ua[ki] = ub[((pa*dkq+k)*dip+i)*djp+ja]; - } else if (j+j == nj) { - const int pa = (nj/2-1)/djp; - const int ja = nj/2-1-pa*djp; - ua[ki+nj/2] = sqrt2*ub[((pa*dkq+k)*dip+i)*djp+ja]; - } else { - const int pa = (nj-1-j)/djp; - const int ja = nj-1-j-pa*djp; - const double aj = ub[((pa*dkq+k)*dip+i)*djp+ja]; - const int pb = (j-1)/djp; - const int jb = j-1-pb*djp; - const double bj = ub[((pb*dkq+k)*dip+i)*djp+jb]; - const double apb = aj+bj; - const double amb = aj-bj; - double wa,wb; - sincospi(double(j)/double(nj+nj),&wb,&wa); - ua[ki+j] = wa*amb+wb*apb; - ua[ki+nj-j] = wa*apb-wb*amb; - } - }); - CHECK(cufftExecD2Z(d2zj_,ua,uc)); + dkq, dip, nj / 2 + 1, GPU_LAMBDA(const int k, const int i, const int j) { + const long ki = (k * dip + i) * nj; + if (j == 0) { + const int pa = (nj - 1) / djp; + const int ja = (nj - 1) - pa * djp; + ua[ki] = ub[((pa * dkq + k) * dip + i) * djp + ja]; + } else if (j + j == nj) { + const int pa = (nj / 2 - 1) / djp; + const int ja = nj / 2 - 1 - pa * djp; + ua[ki + nj / 2] = sqrt2 * ub[((pa * dkq + k) * dip + i) * djp + ja]; + } else { + const int pa = (nj - 1 - j) / djp; + const int ja = nj - 1 - j - pa * djp; + const double aj = ub[((pa * dkq + k) * dip + i) * djp + ja]; + const int pb = (j - 1) / djp; + const int jb = j - 1 - pb * djp; + const double bj = ub[((pb * dkq + k) * dip + i) * djp + jb]; + const double apb = aj + bj; + const double amb = aj - bj; + double wa, wb; + sincospi(double(j) / double(nj + nj), &wb, &wa); + ua[ki + j] = wa * amb + wb * apb; + ua[ki + nj - j] = wa * apb - wb * amb; + } + }); + GPU_Error_Check(cufftExecD2Z(d2zj_, ua, uc)); gpuFor( - dip,nj/2+1,dkq, - GPU_LAMBDA(const int i, const int j, const int k) { - if (j == 0) { - ua[i*djq*dkq+k] = ub[(k*dip+i)*nj2]; - } else if (j+j == nj) { - const int ida = (nj-1)/dj; - const int qa = (nj-1)%dj/djq; - const int ja = nj-1-ida*dj-qa*djq; - ua[(((ida*mq+qa)*dip+i)*djq+ja)*dkq+k] = -ub[(k*dip+i)*nj2+nj]; - } else { - const int jj = j+j; - const int ida = (jj-1)/dj; - const int qa = (jj-1)%dj/djq; - const int ja = jj-1-ida*dj-qa*djq; - const int idb = jj/dj; - const int qb = jj%dj/djq; - const int jb = jj-idb*dj-qb*djq; - const double aj = ub[(k*dip+i)*nj2+jj]; - const double bj = ub[(k*dip+i)*nj2+jj+1]; - ua[(((ida*mq+qa)*dip+i)*djq+ja)*dkq+k] = bj-aj; - ua[(((idb*mq+qb)*dip+i)*djq+jb)*dkq+k] = aj+bj; - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*djq*dkq,MPI_DOUBLE,hb_,dip*djq*dkq,MPI_DOUBLE,commJ_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*djq*dkq,MPI_DOUBLE,ub,dip*djq*dkq,MPI_DOUBLE,commJ_); -#endif + dip, nj / 2 + 1, dkq, GPU_LAMBDA(const int i, const int j, const int k) { + if (j == 0) { + ua[i * djq * dkq + k] = ub[(k * dip + i) * nj2]; + } else if (j + j == nj) { + const int ida = (nj - 1) / dj; + const int qa = (nj - 1) % dj / djq; + const int ja = nj - 1 - ida * dj - qa * djq; + ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = -ub[(k * dip + i) * nj2 + nj]; + } else { + const int jj = j + j; + const int ida = (jj - 1) / dj; + const int qa = (jj - 1) % dj / djq; + const int ja = jj - 1 - ida * dj - qa * djq; + const int idb = jj / dj; + const int qb = jj % dj / djq; + const int jb = jj - idb * dj - qb * djq; + const double aj = ub[(k * dip + i) * nj2 + jj]; + const double bj = ub[(k * dip + i) * nj2 + jj + 1]; + ua[(((ida * mq + qa) * dip + i) * djq + ja) * dkq + k] = bj - aj; + ua[(((idb * mq + qb) * dip + i) * djq + jb) * dkq + k] = aj + bj; + } + }); + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * djq * dkq, MPI_DOUBLE, hb_, dip * djq * dkq, MPI_DOUBLE, commJ_); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(ua, dip * djq * dkq, MPI_DOUBLE, ub, dip * djq * dkq, MPI_DOUBLE, commJ_); + #endif gpuFor( - dip,djq,nk/2+1, - GPU_LAMBDA(const int i, const int j, const int k) { - const long ij = (i*djq+j)*nk; - if (k == 0) { - const int qa = (nk-1)/dkq; - const int ka = nk-1-qa*dkq; - ua[ij] = ub[((qa*dip+i)*djq+j)*dkq+ka]; - } else if (k+k == nk) { - const int qa = (nk/2-1)/dkq; - const int ka = nk/2-1-qa*dkq; - ua[ij+nk/2] = sqrt2*ub[((qa*dip+i)*djq+j)*dkq+ka]; - } else { - const int qa = (nk-1-k)/dkq; - const int ka = nk-1-k-qa*dkq; - const double ak = ub[((qa*dip+i)*djq+j)*dkq+ka]; - const int qb = (k-1)/dkq; - const int kb = k-1-qb*dkq; - const double bk = ub[((qb*dip+i)*djq+j)*dkq+kb]; - const double apb = ak+bk; - const double amb = ak-bk; - double wa,wb; - sincospi(double(k)/double(nk+nk),&wb,&wa); - ua[ij+k] = wa*amb+wb*apb; - ua[ij+nk-k] = wa*apb-wb*amb; - } - }); - CHECK(cufftExecD2Z(d2zk_,ua,uc)); - const double divN = 1.0/(8.0*double(ni)*double(nj)*double(nk)); + dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { + const long ij = (i * djq + j) * nk; + if (k == 0) { + const int qa = (nk - 1) / dkq; + const int ka = nk - 1 - qa * dkq; + ua[ij] = ub[((qa * dip + i) * djq + j) * dkq + ka]; + } else if (k + k == nk) { + const int qa = (nk / 2 - 1) / dkq; + const int ka = nk / 2 - 1 - qa * dkq; + ua[ij + nk / 2] = sqrt2 * ub[((qa * dip + i) * djq + j) * dkq + ka]; + } else { + const int qa = (nk - 1 - k) / dkq; + const int ka = nk - 1 - k - qa * dkq; + const double ak = ub[((qa * dip + i) * djq + j) * dkq + ka]; + const int qb = (k - 1) / dkq; + const int kb = k - 1 - qb * dkq; + const double bk = ub[((qb * dip + i) * djq + j) * dkq + kb]; + const double apb = ak + bk; + const double amb = ak - bk; + double wa, wb; + sincospi(double(k) / double(nk + nk), &wb, &wa); + ua[ij + k] = wa * amb + wb * apb; + ua[ij + nk - k] = wa * apb - wb * amb; + } + }); + GPU_Error_Check(cufftExecD2Z(d2zk_, ua, uc)); + const double divN = 1.0 / (8.0 * double(ni) * double(nj) * double(nk)); gpuFor( - dip,djq,nk/2+1, - GPU_LAMBDA(const int i, const int j, const int k) { - if (k == 0) { - ua[(i*djq+j)*dk] = divN*ub[(i*djq+j)*nk2]; - } else if (k+k == nk) { - const int pqa = (nk-1)/dk; - const int ka = nk-1-pqa*dk; - ua[((pqa*dip+i)*djq+j)*dk+ka] = -divN*ub[(i*djq+j)*nk2+nk]; - } else { - const int kk = k+k; - const double ak = ub[(i*djq+j)*nk2+kk]; - const double bk = ub[(i*djq+j)*nk2+kk+1]; - const int pqa = (kk-1)/dk; - const int ka = kk-1-pqa*dk; - ua[((pqa*dip+i)*djq+j)*dk+ka] = divN*(bk-ak); - const int pqb = kk/dk; - const int kb = kk-pqb*dk; - ua[((pqb*dip+i)*djq+j)*dk+kb] = divN*(ak+bk); - } - }); -#ifndef MPI_GPU - CHECK(cudaMemcpy(ha_,ua,bytes_,cudaMemcpyDeviceToHost)); - MPI_Alltoall(ha_,dip*djq*dk,MPI_DOUBLE,hb_,dip*djq*dk,MPI_DOUBLE,commK_); - CHECK(cudaMemcpyAsync(ub,hb_,bytes_,cudaMemcpyHostToDevice,0)); -#else - CHECK(cudaDeviceSynchronize()); - MPI_Alltoall(ua,dip*djq*dk,MPI_DOUBLE,ub,dip*djq*dk,MPI_DOUBLE,commK_); -#endif + dip, djq, nk / 2 + 1, GPU_LAMBDA(const int i, const int j, const int k) { + if (k == 0) { + ua[(i * djq + j) * dk] = divN * ub[(i * djq + j) * nk2]; + } else if (k + k == nk) { + const int pqa = (nk - 1) / dk; + const int ka = nk - 1 - pqa * dk; + ua[((pqa * dip + i) * djq + j) * dk + ka] = -divN * ub[(i * djq + j) * nk2 + nk]; + } else { + const int kk = k + k; + const double ak = ub[(i * djq + j) * nk2 + kk]; + const double bk = ub[(i * djq + j) * nk2 + kk + 1]; + const int pqa = (kk - 1) / dk; + const int ka = kk - 1 - pqa * dk; + ua[((pqa * dip + i) * djq + j) * dk + ka] = divN * (bk - ak); + const int pqb = kk / dk; + const int kb = kk - pqb * dk; + ua[((pqb * dip + i) * djq + j) * dk + kb] = divN * (ak + bk); + } + }); + #ifndef MPI_GPU + GPU_Error_Check(cudaMemcpy(ha_, ua, bytes_, cudaMemcpyDeviceToHost)); + MPI_Alltoall(ha_, dip * djq * dk, MPI_DOUBLE, hb_, dip * djq * dk, MPI_DOUBLE, commK_); + GPU_Error_Check(cudaMemcpyAsync(ub, hb_, bytes_, cudaMemcpyHostToDevice, 0)); + #else + GPU_Error_Check(cudaDeviceSynchronize()); + MPI_Alltoall(ua, dip * djq * dk, MPI_DOUBLE, ub, dip * djq * dk, MPI_DOUBLE, commK_); + #endif gpuFor( - mp,dip,mq,djq,dk, - GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { - const int iLo = p*dip; - const int jLo = q*djq; - if ((iLo+i < di) && (jLo+j < dj)) ua[((i+iLo)*dj+j+jLo)*dk+k] = ub[(((p*mq+q)*dip+i)*djq+j)*dk+k]; - }); + mp, dip, mq, djq, dk, GPU_LAMBDA(const int p, const int i, const int q, const int j, const int k) { + const int iLo = p * dip; + const int jLo = q * djq; + if ((iLo + i < di) && (jLo + j < dj)) { + ua[((i + iLo) * dj + j + jLo) * dk + k] = ub[(((p * mq + q) * dip + i) * djq + j) * dk + k]; + } + }); } #endif diff --git a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp index 8d868b54d..0094f5b0d 100644 --- a/src/gravity/paris/PoissonZero3DBlockedGPU.hpp +++ b/src/gravity/paris/PoissonZero3DBlockedGPU.hpp @@ -1,28 +1,31 @@ #pragma once #include + #include "../../utils/gpu.hpp" -class PoissonZero3DBlockedGPU { - public: - PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); - ~PoissonZero3DBlockedGPU(); - long bytes() const { return bytes_; } - void solve(long bytes, double *density, double *potential) const; - private: - double ddi_,ddj_,ddk_; - int idi_,idj_,idk_; - int mi_,mj_,mk_; - int ni_,nj_,nk_; - int mp_,mq_; - int idp_,idq_; - MPI_Comm commI_,commJ_,commK_; - int di_,dj_,dk_; - int dip_,djp_,djq_,dkq_; - int ni2_,nj2_,nk2_; - long bytes_; - cufftHandle d2zi_,d2zj_,d2zk_; +class PoissonZero3DBlockedGPU +{ + public: + PoissonZero3DBlockedGPU(const int n[3], const double lo[3], const double hi[3], const int m[3], const int id[3]); + ~PoissonZero3DBlockedGPU(); + long bytes() const { return bytes_; } + void solve(long bytes, double *density, double *potential) const; + + private: + double ddi_, ddj_, ddk_; + int idi_, idj_, idk_; + int mi_, mj_, mk_; + int ni_, nj_, nk_; + int mp_, mq_; + int idp_, idq_; + MPI_Comm commI_, commJ_, commK_; + int di_, dj_, dk_; + int dip_, djp_, djq_, dkq_; + int ni2_, nj2_, nk2_; + long bytes_; + cufftHandle d2zi_, d2zj_, d2zk_; #ifndef MPI_GPU - double *ha_, *hb_; + double *ha_, *hb_; #endif }; diff --git a/src/gravity/paris/README.md b/src/gravity/paris/README.md index a73664fa3..d019d5e1f 100644 --- a/src/gravity/paris/README.md +++ b/src/gravity/paris/README.md @@ -8,7 +8,7 @@ A 3D Poisson solver that expects periodic boundary conditions. *ParisPeriodic* calls the FFT filter provided by the *HenryPeriodic* class, where it provides a C++ lambda function that solves the Poisson equation in frequency space. It assumes fields in a 3D block distribution with no ghost cells. -It is used by the Cholla class *Potential_Paris_3D* to solve Poisson problems with periodic boundary conditions. +It is used by the Cholla class *PotentialParis3D* to solve Poisson problems with periodic boundary conditions. To use: - Construct a *ParisPeriodic* object using information about the global domain and local MPI task. @@ -44,12 +44,12 @@ A 3D Poisson solver that expects zero-valued boundary conditions. *PoissonZero3DBlockedGPU* uses discrete sine transforms (DSTs) instead of Fourier transforms to enforce zero-valued, non-periodic boundary conditions. It is currently a monolithic class, not depenedent on a *Henry* class. -It is used by the Cholla class *Potential_Paris_Galactic* to solve Poisson problems with non-zero, non-periodic, analytic boundary conditions. +It is used by the Cholla class *PotentialParisGalactic* to solve Poisson problems with non-zero, non-periodic, analytic boundary conditions. -*Potential_Paris_Galactic::Get_Potential()* uses *PoissonZero3DBlockedGPU::solve()* as follows. +*PotentialParisGalactic::Get_Potential()* uses *PoissonZero3DBlockedGPU::solve()* as follows. - Subtract an analytic density from the input density, where the analytic density matches the input density at the domain boundaries. This results in a density with zero-valued boundaries. -- Call *PoissonZero3DBlockedGPU::solve()* with this density with zero-valued boundaries. +- Call *PoissonZero3DBlockedGPU::solve()* with this density with zero-valued boundaries. - Add an analytic potential to the resulting potential, where the analytic potential is the solution to the Poisson equation for the analytic density that was subtracted from the input density. The resulting sum of potentials is the solution to the Poisson problem for the full input density. diff --git a/src/gravity/potential_SOR_3D.cpp b/src/gravity/potential_SOR_3D.cpp index a7a0b4d2f..0cffeb981 100644 --- a/src/gravity/potential_SOR_3D.cpp +++ b/src/gravity/potential_SOR_3D.cpp @@ -1,20 +1,22 @@ #if defined(GRAVITY) && defined(SOR) -#include "../gravity/potential_SOR_3D.h" -#include "../io/io.h" -#include -#include -#include "../grid/grid3D.h" + #include "../gravity/potential_SOR_3D.h" -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include + #include + #include "../grid/grid3D.h" + #include "../io/io.h" -Potential_SOR_3D::Potential_SOR_3D( void ){} + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif -void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real){ +Potential_SOR_3D::Potential_SOR_3D(void) {} +void Potential_SOR_3D::Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, + int nx_real, int ny_real, int nz_real, Real dx_real, Real dy_real, Real dz_real) +{ Lbox_x = Lx; Lbox_y = Ly; Lbox_z = Lz; @@ -33,13 +35,13 @@ void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y n_ghost = N_GHOST_POTENTIAL; - nx_pot = nx_local + 2*n_ghost; - ny_pot = ny_local + 2*n_ghost; - nz_pot = nz_local + 2*n_ghost; + nx_pot = nx_local + 2 * n_ghost; + ny_pot = ny_local + 2 * n_ghost; + nz_pot = nz_local + 2 * n_ghost; - n_cells_local = nx_local*ny_local*nz_local; - n_cells_potential = nx_pot*ny_pot*nz_pot; - n_cells_total = nx_total*ny_total*nz_total; + n_cells_local = nx_local * ny_local * nz_local; + n_cells_potential = nx_pot * ny_pot * nz_pot; + n_cells_total = nx_total * ny_total * nz_total; n_ghost_transfer = 1; @@ -51,114 +53,129 @@ void Potential_SOR_3D::Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y size_buffer_x = n_ghost_transfer * ny_local * nz_local; size_buffer_y = n_ghost_transfer * nx_local * nz_local; size_buffer_z = n_ghost_transfer * nx_local * ny_local; - if ( size_buffer_x%2 !=0 ) chprintf( " SOR Warning: Buffer X not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n"); - else size_buffer_x /= 2; - if ( size_buffer_y%2 !=0 ) chprintf( " SOR Warning: Buffer Y not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n"); - else size_buffer_y /= 2; - if ( size_buffer_z%2 !=0 ) chprintf( " SOR Warning: Buffer Y not divisible by 2, Disable HALF_SIZE_BOUNDARIES \n"); - else size_buffer_z /= 2; + if (size_buffer_x % 2 != 0) + chprintf( + " SOR Warning: Buffer X not divisible by 2, Disable " + "HALF_SIZE_BOUNDARIES \n"); + else + size_buffer_x /= 2; + if (size_buffer_y % 2 != 0) + chprintf( + " SOR Warning: Buffer Y not divisible by 2, Disable " + "HALF_SIZE_BOUNDARIES \n"); + else + size_buffer_y /= 2; + if (size_buffer_z % 2 != 0) + chprintf( + " SOR Warning: Buffer Y not divisible by 2, Disable " + "HALF_SIZE_BOUNDARIES \n"); + else + size_buffer_z /= 2; #endif - //Flag to transfer Poisson Boundaries when calling Set_Boundaries + // Flag to transfer Poisson Boundaries when calling Set_Boundaries TRANSFER_POISSON_BOUNDARIES = false; + chprintf(" Using Poisson Solver: SOR\n"); + chprintf(" SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, + dx, dy, dz); - chprintf( " Using Poisson Solver: SOR\n"); - chprintf( " SOR: L[ %f %f %f ] N[ %d %d %d ] dx[ %f %f %f ]\n", Lbox_x, Lbox_y, Lbox_z, nx_local, ny_local, nz_local, dx, dy, dz ); - - chprintf( " SOR: Allocating memory...\n"); + chprintf(" SOR: Allocating memory...\n"); AllocateMemory_CPU(); AllocateMemory_GPU(); potential_initialized = false; - } - -void Potential_SOR_3D::AllocateMemory_CPU( void ){ - F.output_h = (Real *) malloc(n_cells_local*sizeof(Real)); - F.converged_h = (bool *) malloc(sizeof(bool)); - +void Potential_SOR_3D::AllocateMemory_CPU(void) +{ + F.output_h = (Real *)malloc(n_cells_local * sizeof(Real)); + F.converged_h = (bool *)malloc(sizeof(bool)); } - -void Potential_SOR_3D::AllocateMemory_GPU( void ){ - - Allocate_Array_GPU_Real( &F.input_d, n_cells_local ); - Allocate_Array_GPU_Real( &F.density_d, n_cells_local ); - Allocate_Array_GPU_Real( &F.potential_d, n_cells_potential ); - Allocate_Array_GPU_bool( &F.converged_d, 1 ); - Allocate_Array_GPU_Real( &F.boundaries_buffer_x0_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.boundaries_buffer_x1_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.boundaries_buffer_y0_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.boundaries_buffer_y1_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.boundaries_buffer_z0_d, size_buffer_z); - Allocate_Array_GPU_Real( &F.boundaries_buffer_z1_d, size_buffer_z); +void Potential_SOR_3D::AllocateMemory_GPU(void) +{ + Allocate_Array_GPU_Real(&F.input_d, n_cells_local); + Allocate_Array_GPU_Real(&F.density_d, n_cells_local); + Allocate_Array_GPU_Real(&F.potential_d, n_cells_potential); + Allocate_Array_GPU_bool(&F.converged_d, 1); + Allocate_Array_GPU_Real(&F.boundaries_buffer_x0_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.boundaries_buffer_x1_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.boundaries_buffer_y0_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.boundaries_buffer_y1_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.boundaries_buffer_z0_d, size_buffer_z); + Allocate_Array_GPU_Real(&F.boundaries_buffer_z1_d, size_buffer_z); #ifdef MPI_CHOLLA - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_x0_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_x1_d, size_buffer_x); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_y0_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_y1_d, size_buffer_y); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_z0_d, size_buffer_z); - Allocate_Array_GPU_Real( &F.recv_boundaries_buffer_z1_d, size_buffer_z); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_x0_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_x1_d, size_buffer_x); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_y0_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_y1_d, size_buffer_y); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_z0_d, size_buffer_z); + Allocate_Array_GPU_Real(&F.recv_boundaries_buffer_z1_d, size_buffer_z); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - Allocate_Array_GPU_Real( &F.boundary_isolated_x0_d, n_ghost*ny_local*nz_local ); - Allocate_Array_GPU_Real( &F.boundary_isolated_x1_d, n_ghost*ny_local*nz_local ); + Allocate_Array_GPU_Real(&F.boundary_isolated_x0_d, n_ghost * ny_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_x1_d, n_ghost * ny_local * nz_local); #endif #ifdef GRAV_ISOLATED_BOUNDARY_X - Allocate_Array_GPU_Real( &F.boundary_isolated_y0_d, n_ghost*nx_local*nz_local ); - Allocate_Array_GPU_Real( &F.boundary_isolated_y1_d, n_ghost*nx_local*nz_local ); + Allocate_Array_GPU_Real(&F.boundary_isolated_y0_d, n_ghost * nx_local * nz_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_y1_d, n_ghost * nx_local * nz_local); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Allocate_Array_GPU_Real( &F.boundary_isolated_z0_d, n_ghost*nx_local*ny_local ); - Allocate_Array_GPU_Real( &F.boundary_isolated_z1_d, n_ghost*nx_local*ny_local ); + Allocate_Array_GPU_Real(&F.boundary_isolated_z0_d, n_ghost * nx_local * ny_local); + Allocate_Array_GPU_Real(&F.boundary_isolated_z1_d, n_ghost * nx_local * ny_local); #endif - } -void Potential_SOR_3D::Copy_Input_And_Initialize( Real *input_density, const Real *const input_potential, Real Grav_Constant, Real dens_avrg, Real current_a ){ - Copy_Input( n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, current_a ); - - if ( !potential_initialized ){ - chprintf( "SOR: Initializing Potential \n"); - CHECK( cudaMemcpy( F.potential_d, input_potential, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice ) ); - //Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, F.potential_d, F.density_d ); +void Potential_SOR_3D::Copy_Input_And_Initialize(Real *input_density, const Real *const input_potential, + Real Grav_Constant, Real dens_avrg, Real current_a) +{ + Copy_Input(n_cells_local, F.input_d, input_density, Grav_Constant, dens_avrg, current_a); + + if (!potential_initialized) { + chprintf("SOR: Initializing Potential \n"); + GPU_Error_Check( + cudaMemcpy(F.potential_d, input_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); + // Initialize_Potential( nx_local, ny_local, nz_local, n_ghost, + // F.potential_d, F.density_d ); potential_initialized = true; } } - -void Potential_SOR_3D::Poisson_Partial_Iteration( int n_step, Real omega, Real epsilon ){ - if (n_step == 0 ) Poisson_iteration_Patial_1( n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, F.density_d, F.potential_d, F.converged_h, F.converged_d ); - if (n_step == 1 ) Poisson_iteration_Patial_2( n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, F.density_d, F.potential_d, F.converged_h, F.converged_d ); +void Potential_SOR_3D::Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon) +{ + if (n_step == 0) + Poisson_iteration_Patial_1(n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, + F.density_d, F.potential_d, F.converged_h, F.converged_d); + if (n_step == 1) + Poisson_iteration_Patial_2(n_cells_local, nx_local, ny_local, nz_local, n_ghost, dx, dy, dz, omega, epsilon, + F.density_d, F.potential_d, F.converged_h, F.converged_d); } - -void Grid3D::Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P ){ - +void Grid3D::Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct Parameters *P) +{ #ifdef TIME_SOR Real time_start, time_end, time; - time_start = get_time(); + time_start = Get_Time(); #endif - Grav.Poisson_solver.Copy_Input_And_Initialize( Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, current_a ); - - //Set Isolated Boundary Conditions - Grav.Copy_Isolated_Boundaries_To_GPU( P ); - Grav.Poisson_solver.Set_Isolated_Boundary_Conditions( Grav.boundary_flags, P ); + Grav.Poisson_solver.Copy_Input_And_Initialize(Grav.F.density_h, Grav.F.potential_h, Grav_Constant, dens_avrg, + current_a); + // Set Isolated Boundary Conditions + Grav.Copy_Isolated_Boundaries_To_GPU(P); + Grav.Poisson_solver.Set_Isolated_Boundary_Conditions(Grav.boundary_flags, P); Real epsilon = 1e-4; int max_iter = 10000000; - int n_iter = 0; + int n_iter = 0; Grav.Poisson_solver.F.converged_h[0] = 0; // For Diriclet Boundaries - Real omega = 2. / ( 1 + M_PI / Grav.Poisson_solver.nx_total ); + Real omega = 2. / (1 + M_PI / Grav.Poisson_solver.nx_total); // For Periodic Boundaries // Real omega = 2. / ( 1 + 2*M_PI / nx_total ); @@ -166,374 +183,398 @@ void Grid3D::Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current bool set_boundaries; - //Number of iterations in between boundary transfers + // Number of iterations in between boundary transfers int n_iter_per_boundaries_transfer = 1; - // Iterate to solve Poisson equation - while ( Grav.Poisson_solver.F.converged_h[0] == 0 ) { - + while (Grav.Poisson_solver.F.converged_h[0] == 0) { set_boundaries = false; - if ( n_iter % n_iter_per_boundaries_transfer == 0 ) set_boundaries = true; + if (n_iter % n_iter_per_boundaries_transfer == 0) set_boundaries = true; // First Partial Iteration Grav.Poisson_solver.iteration_parity = 0; - if ( set_boundaries ){ + if (set_boundaries) { Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = true; - Set_Boundary_Conditions( *P ); + Set_Boundary_Conditions(*P); Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false; } - Grav.Poisson_solver.Poisson_Partial_Iteration( Grav.Poisson_solver.iteration_parity, omega, epsilon ); - + Grav.Poisson_solver.Poisson_Partial_Iteration(Grav.Poisson_solver.iteration_parity, omega, epsilon); // Second Partial Iteration Grav.Poisson_solver.iteration_parity = 1; - if ( set_boundaries ){ + if (set_boundaries) { Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = true; - Set_Boundary_Conditions( *P ); + Set_Boundary_Conditions(*P); Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES = false; } - Grav.Poisson_solver.Poisson_Partial_Iteration( Grav.Poisson_solver.iteration_parity, omega, epsilon ); + Grav.Poisson_solver.Poisson_Partial_Iteration(Grav.Poisson_solver.iteration_parity, omega, epsilon); - // Get convergence state - #ifdef MPI_CHOLLA - Grav.Poisson_solver.F.converged_h[0] = Grav.Poisson_solver.Get_Global_Converged( Grav.Poisson_solver.F.converged_h[0] ); - #endif + // Get convergence state + #ifdef MPI_CHOLLA + Grav.Poisson_solver.F.converged_h[0] = + Grav.Poisson_solver.Get_Global_Converged(Grav.Poisson_solver.F.converged_h[0]); + #endif - //Only aloow to connverge after the boundaries have been transfere to avoid false convergence in the boundaries. - if ( set_boundaries == false ) Grav.Poisson_solver.F.converged_h[0] = 0; + // Only aloow to connverge after the boundaries have been transfere to avoid + // false convergence in the boundaries. + if (set_boundaries == false) Grav.Poisson_solver.F.converged_h[0] = 0; n_iter += 1; - if ( n_iter == max_iter ) break; + if (n_iter == max_iter) break; } - if ( n_iter == max_iter ) chprintf(" SOR: No convergence in %d iterations \n", n_iter); - else chprintf(" SOR: Converged in %d iterations \n", n_iter); + if (n_iter == max_iter) + chprintf(" SOR: No convergence in %d iterations \n", n_iter); + else + chprintf(" SOR: Converged in %d iterations \n", n_iter); - Grav.Poisson_solver.Copy_Output( Grav.F.potential_h ); + Grav.Poisson_solver.Copy_Output(Grav.F.potential_h); #ifdef TIME_SOR - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA MPI_Barrier(world); + #endif + time_end = Get_Time(); + time = (time_end - time_start); + chprintf(" SOR: Time = %f seg\n", time); #endif - time_end = get_time(); - time = (time_end - time_start); - chprintf( " SOR: Time = %f seg\n", time ); - #endif - - } -void Grav3D::Copy_Isolated_Boundaries_To_GPU( struct parameters *P ){ - - if ( P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3 ) return; +void Grav3D::Copy_Isolated_Boundaries_To_GPU(struct Parameters *P) +{ + if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) + return; // chprintf( " Copying Isolated Boundaries \n"); - if ( boundary_flags[0] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d, Poisson_solver.n_ghost*ny_local*nz_local ); - if ( boundary_flags[1] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d, Poisson_solver.n_ghost*ny_local*nz_local ); - if ( boundary_flags[2] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d, Poisson_solver.n_ghost*nx_local*nz_local ); - if ( boundary_flags[3] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d, Poisson_solver.n_ghost*nx_local*nz_local ); - if ( boundary_flags[4] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d, Poisson_solver.n_ghost*nx_local*ny_local ); - if ( boundary_flags[5] == 3 ) Copy_Isolated_Boundary_To_GPU_buffer( F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d, Poisson_solver.n_ghost*nx_local*ny_local ); - - + if (boundary_flags[0] == 3) + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_x0, Poisson_solver.F.boundary_isolated_x0_d, + Poisson_solver.n_ghost * ny_local * nz_local); + if (boundary_flags[1] == 3) + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_x1, Poisson_solver.F.boundary_isolated_x1_d, + Poisson_solver.n_ghost * ny_local * nz_local); + if (boundary_flags[2] == 3) + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_y0, Poisson_solver.F.boundary_isolated_y0_d, + Poisson_solver.n_ghost * nx_local * nz_local); + if (boundary_flags[3] == 3) + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_y1, Poisson_solver.F.boundary_isolated_y1_d, + Poisson_solver.n_ghost * nx_local * nz_local); + if (boundary_flags[4] == 3) + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_z0, Poisson_solver.F.boundary_isolated_z0_d, + Poisson_solver.n_ghost * nx_local * ny_local); + if (boundary_flags[5] == 3) + Copy_Isolated_Boundary_To_GPU_buffer(F.pot_boundary_z1, Poisson_solver.F.boundary_isolated_z1_d, + Poisson_solver.n_ghost * nx_local * ny_local); } -void Potential_SOR_3D::Set_Isolated_Boundary_Conditions( int *boundary_flags, struct parameters *P ){ - - - if ( P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3 ) return; - - chprintf( " Setting Isolated Boundaries \n"); - if ( boundary_flags[0] == 3 ) Set_Isolated_Boundary_GPU( 0, 0, F.boundary_isolated_x0_d ); - if ( boundary_flags[1] == 3 ) Set_Isolated_Boundary_GPU( 0, 1, F.boundary_isolated_x1_d ); - if ( boundary_flags[2] == 3 ) Set_Isolated_Boundary_GPU( 1, 0, F.boundary_isolated_y0_d ); - if ( boundary_flags[3] == 3 ) Set_Isolated_Boundary_GPU( 1, 1, F.boundary_isolated_y1_d ); - if ( boundary_flags[4] == 3 ) Set_Isolated_Boundary_GPU( 2, 0, F.boundary_isolated_z0_d ); - if ( boundary_flags[5] == 3 ) Set_Isolated_Boundary_GPU( 2, 1, F.boundary_isolated_z1_d ); - +void Potential_SOR_3D::Set_Isolated_Boundary_Conditions(int *boundary_flags, struct Parameters *P) +{ + if (P->xl_bcnd != 3 && P->xu_bcnd != 3 && P->yl_bcnd != 3 && P->yu_bcnd != 3 && P->zl_bcnd != 3 && P->zu_bcnd != 3) + return; + + chprintf(" Setting Isolated Boundaries \n"); + if (boundary_flags[0] == 3) Set_Isolated_Boundary_GPU(0, 0, F.boundary_isolated_x0_d); + if (boundary_flags[1] == 3) Set_Isolated_Boundary_GPU(0, 1, F.boundary_isolated_x1_d); + if (boundary_flags[2] == 3) Set_Isolated_Boundary_GPU(1, 0, F.boundary_isolated_y0_d); + if (boundary_flags[3] == 3) Set_Isolated_Boundary_GPU(1, 1, F.boundary_isolated_y1_d); + if (boundary_flags[4] == 3) Set_Isolated_Boundary_GPU(2, 0, F.boundary_isolated_z0_d); + if (boundary_flags[5] == 3) Set_Isolated_Boundary_GPU(2, 1, F.boundary_isolated_z1_d); } - - - -void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic( int direction, int side ){ - +void Potential_SOR_3D::Copy_Poisson_Boundary_Periodic(int direction, int side) +{ Real *boundaries_buffer; - if( direction == 0 ){ - if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_x0_d; - if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_x1_d; + if (direction == 0) { + if (side == 0) boundaries_buffer = F.boundaries_buffer_x0_d; + if (side == 1) boundaries_buffer = F.boundaries_buffer_x1_d; } - if( direction == 1 ){ - if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_y0_d; - if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_y1_d; + if (direction == 1) { + if (side == 0) boundaries_buffer = F.boundaries_buffer_y0_d; + if (side == 1) boundaries_buffer = F.boundaries_buffer_y1_d; } - if( direction == 2 ){ - if ( side == 0 ) boundaries_buffer = F.boundaries_buffer_z0_d; - if ( side == 1 ) boundaries_buffer = F.boundaries_buffer_z1_d; + if (direction == 2) { + if (side == 0) boundaries_buffer = F.boundaries_buffer_z0_d; + if (side == 1) boundaries_buffer = F.boundaries_buffer_z1_d; } int side_load, side_unload; - side_load = side; - side_unload = ( side_load + 1 ) % 2; - - Load_Transfer_Buffer_GPU( direction, side_load, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer ); - Unload_Transfer_Buffer_GPU( direction, side_unload, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, boundaries_buffer ); + side_load = side; + side_unload = (side_load + 1) % 2; + Load_Transfer_Buffer_GPU(direction, side_load, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + boundaries_buffer); + Unload_Transfer_Buffer_GPU(direction, side_unload, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, + F.potential_d, boundaries_buffer); } - -void Potential_SOR_3D::FreeMemory_GPU( void ){ - - Free_Array_GPU_Real( F.input_d ); - Free_Array_GPU_Real( F.density_d ); - Free_Array_GPU_Real( F.potential_d ); - Free_Array_GPU_Real( F.boundaries_buffer_x0_d ); - Free_Array_GPU_Real( F.boundaries_buffer_x1_d ); - Free_Array_GPU_Real( F.boundaries_buffer_y0_d ); - Free_Array_GPU_Real( F.boundaries_buffer_y1_d ); - Free_Array_GPU_Real( F.boundaries_buffer_z0_d ); - Free_Array_GPU_Real( F.boundaries_buffer_z1_d ); +void Potential_SOR_3D::FreeMemory_GPU(void) +{ + Free_Array_GPU_Real(F.input_d); + Free_Array_GPU_Real(F.density_d); + Free_Array_GPU_Real(F.potential_d); + Free_Array_GPU_Real(F.boundaries_buffer_x0_d); + Free_Array_GPU_Real(F.boundaries_buffer_x1_d); + Free_Array_GPU_Real(F.boundaries_buffer_y0_d); + Free_Array_GPU_Real(F.boundaries_buffer_y1_d); + Free_Array_GPU_Real(F.boundaries_buffer_z0_d); + Free_Array_GPU_Real(F.boundaries_buffer_z1_d); #ifdef MPI_CHOLLA - Free_Array_GPU_Real( F.recv_boundaries_buffer_x0_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_x1_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_y0_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_y1_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_z0_d ); - Free_Array_GPU_Real( F.recv_boundaries_buffer_z1_d ); + Free_Array_GPU_Real(F.recv_boundaries_buffer_x0_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_x1_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_y0_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_y1_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_z0_d); + Free_Array_GPU_Real(F.recv_boundaries_buffer_z1_d); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Free_Array_GPU_Real( F.boundary_isolated_x0_d ); - Free_Array_GPU_Real( F.boundary_isolated_x1_d ); + Free_Array_GPU_Real(F.boundary_isolated_x0_d); + Free_Array_GPU_Real(F.boundary_isolated_x1_d); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Y - Free_Array_GPU_Real( F.boundary_isolated_y0_d ); - Free_Array_GPU_Real( F.boundary_isolated_y1_d ); + Free_Array_GPU_Real(F.boundary_isolated_y0_d); + Free_Array_GPU_Real(F.boundary_isolated_y1_d); #endif #ifdef GRAV_ISOLATED_BOUNDARY_Z - Free_Array_GPU_Real( F.boundary_isolated_z0_d ); - Free_Array_GPU_Real( F.boundary_isolated_z1_d ); + Free_Array_GPU_Real(F.boundary_isolated_z0_d); + Free_Array_GPU_Real(F.boundary_isolated_z1_d); #endif - } - -void Potential_SOR_3D::Reset( void ){ - free( F.output_h ); +void Potential_SOR_3D::Reset(void) +{ + free(F.output_h); FreeMemory_GPU(); } + #ifdef MPI_CHOLLA - -#ifdef MPI_CHOLLA - -int Grid3D::Load_Poisson_Boundary_To_Buffer( int direction, int side, Real *buffer_host ){ - +int Grid3D::Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer_host) +{ int size_buffer; - if ( direction == 0 ) size_buffer = Grav.Poisson_solver.size_buffer_x; - if ( direction == 1 ) size_buffer = Grav.Poisson_solver.size_buffer_y; - if ( direction == 2 ) size_buffer = Grav.Poisson_solver.size_buffer_z; - + if (direction == 0) size_buffer = Grav.Poisson_solver.size_buffer_x; + if (direction == 1) size_buffer = Grav.Poisson_solver.size_buffer_y; + if (direction == 2) size_buffer = Grav.Poisson_solver.size_buffer_z; - //Load the transfer buffer in the GPU - if ( direction == 0 ){ - if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x0(); - if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x1(); + // Load the transfer buffer in the GPU + if (direction == 0) { + if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x0(); + if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_x1(); } - if ( direction == 1 ){ - if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y0(); - if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y1(); + if (direction == 1) { + if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y0(); + if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_y1(); } - if ( direction == 2 ){ - if ( side == 0 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z0(); - if ( side == 1 ) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z1(); + if (direction == 2) { + if (side == 0) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z0(); + if (side == 1) Grav.Poisson_solver.Load_Transfer_Buffer_GPU_z1(); } // Copy the device_buffer to the host_buffer Real *buffer_dev; - if ( direction == 0 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x1_d; + if (direction == 0) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_x1_d; } - if ( direction == 1 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y1_d; + if (direction == 1) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_y1_d; } - if ( direction == 2 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d; + if (direction == 2) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.boundaries_buffer_z1_d; } - Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host( size_buffer, buffer_host, buffer_dev ); - + Grav.Poisson_solver.Copy_Transfer_Buffer_To_Host(size_buffer, buffer_host, buffer_dev); return size_buffer; } - -void Grid3D::Unload_Poisson_Boundary_From_Buffer( int direction, int side, Real *buffer_host ){ - +void Grid3D::Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host) +{ int size_buffer; - if ( direction == 0 ) size_buffer = Grav.Poisson_solver.size_buffer_x; - if ( direction == 1 ) size_buffer = Grav.Poisson_solver.size_buffer_y; - if ( direction == 2 ) size_buffer = Grav.Poisson_solver.size_buffer_z; - + if (direction == 0) size_buffer = Grav.Poisson_solver.size_buffer_x; + if (direction == 1) size_buffer = Grav.Poisson_solver.size_buffer_y; + if (direction == 2) size_buffer = Grav.Poisson_solver.size_buffer_z; // Copy the host_buffer to the device_buffer Real *buffer_dev; - if ( direction == 0 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d; + if (direction == 0) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_x1_d; } - if ( direction == 1 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d; + if (direction == 1) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_y1_d; } - if ( direction == 2 ){ - if ( side == 0 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d; - if ( side == 1 ) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d; + if (direction == 2) { + if (side == 0) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z0_d; + if (side == 1) buffer_dev = Grav.Poisson_solver.F.recv_boundaries_buffer_z1_d; } - Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device( size_buffer, buffer_host, buffer_dev ); + Grav.Poisson_solver.Copy_Transfer_Buffer_To_Device(size_buffer, buffer_host, buffer_dev); - //Unload the transfer buffer in the GPU - if ( direction == 0 ){ - if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x0(); - if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x1(); + // Unload the transfer buffer in the GPU + if (direction == 0) { + if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x0(); + if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_x1(); } - if ( direction == 1 ){ - if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y0(); - if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y1(); + if (direction == 1) { + if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y0(); + if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_y1(); } - if ( direction == 2 ){ - if ( side == 0 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z0(); - if ( side == 1 ) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z1(); + if (direction == 2) { + if (side == 0) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z0(); + if (side == 1) Grav.Poisson_solver.Unload_Transfer_Buffer_GPU_z1(); } - } - - -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d ); - #else - Load_Transfer_Buffer_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x0_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x0_d); + #else + Load_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x0_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d ); - #else - Load_Transfer_Buffer_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_x1_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_x1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x1_d); + #else + Load_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_x1_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d ); - #else - Load_Transfer_Buffer_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y0_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y0_d); + #else + Load_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y0_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d ); - #else - Load_Transfer_Buffer_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_y1_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_y1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y1_d); + #else + Load_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_y1_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d ); - #else - Load_Transfer_Buffer_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z0_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z0_d); + #else + Load_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z0_d); + #endif } -void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1(){ - #ifdef HALF_SIZE_BOUNDARIES - Load_Transfer_Buffer_Half_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d ); - #else - Load_Transfer_Buffer_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.boundaries_buffer_z1_d ); - #endif +void Potential_SOR_3D::Load_Transfer_Buffer_GPU_z1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Load_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z1_d); + #else + Load_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.boundaries_buffer_z1_d); + #endif } - -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d ); - #else - Unload_Transfer_Buffer_GPU( 0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x0_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x0_d); + #else + Unload_Transfer_Buffer_GPU(0, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x0_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d ); - #else - Unload_Transfer_Buffer_GPU( 0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_x1_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_x1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x1_d); + #else + Unload_Transfer_Buffer_GPU(0, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_x1_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d ); - #else - Unload_Transfer_Buffer_GPU( 1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y0_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y0_d); + #else + Unload_Transfer_Buffer_GPU(1, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y0_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d ); - #else - Unload_Transfer_Buffer_GPU( 1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_y1_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_y1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y1_d); + #else + Unload_Transfer_Buffer_GPU(1, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_y1_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d ); - #else - Unload_Transfer_Buffer_GPU( 2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z0_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z0() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z0_d); + #else + Unload_Transfer_Buffer_GPU(2, 0, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z0_d); + #endif } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1(){ - #ifdef HALF_SIZE_BOUNDARIES - Unload_Transfer_Buffer_Half_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d ); - #else - Unload_Transfer_Buffer_GPU( 2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, F.recv_boundaries_buffer_z1_d ); - #endif +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU_z1() +{ + #ifdef HALF_SIZE_BOUNDARIES + Unload_Transfer_Buffer_Half_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z1_d); + #else + Unload_Transfer_Buffer_GPU(2, 1, nx_local, ny_local, nz_local, n_ghost_transfer, n_ghost, F.potential_d, + F.recv_boundaries_buffer_z1_d); + #endif } - - -bool Potential_SOR_3D::Get_Global_Converged( bool converged_local ){ - - int in = (int) converged_local; +bool Potential_SOR_3D::Get_Global_Converged(bool converged_local) +{ + int in = (int)converged_local; int out; bool y; - MPI_Allreduce( &in, &out, 1, MPI_INT, MPI_MIN, world); - y = (bool) out; + MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_MIN, world); + y = (bool)out; return y; - } -#endif - - - - + #endif -#endif //GRAVITY +#endif // GRAVITY diff --git a/src/gravity/potential_SOR_3D.h b/src/gravity/potential_SOR_3D.h index d5064b35c..2b7c71abc 100644 --- a/src/gravity/potential_SOR_3D.h +++ b/src/gravity/potential_SOR_3D.h @@ -1,17 +1,18 @@ #if defined(GRAVITY) && defined(SOR) -#ifndef POTENTIAL_SOR_3D_H -#define POTENTIAL_SOR_3D_H + #ifndef POTENTIAL_SOR_3D_H + #define POTENTIAL_SOR_3D_H -#include "../global/global.h" -#include + #include + + #include "../global/global.h" // #define TIME_SOR // #define HALF_SIZE_BOUNDARIES -class Potential_SOR_3D{ - public: - +class Potential_SOR_3D +{ + public: Real Lbox_x; Real Lbox_y; Real Lbox_z; @@ -38,7 +39,6 @@ class Potential_SOR_3D{ grav_int_t n_cells_potential; grav_int_t n_cells_total; - int n_ghost_transfer; int size_buffer_x; int size_buffer_y; @@ -50,90 +50,97 @@ class Potential_SOR_3D{ bool potential_initialized; - struct Fields - { - - Real *output_h; - - Real *input_d; - // Real *output_d; - Real *density_d; - Real *potential_d; - - bool *converged_d; - - bool *converged_h; - - Real *boundaries_buffer_x0_d; - Real *boundaries_buffer_x1_d; - Real *boundaries_buffer_y0_d; - Real *boundaries_buffer_y1_d; - Real *boundaries_buffer_z0_d; - Real *boundaries_buffer_z1_d; - - - Real *boundary_isolated_x0_d; - Real *boundary_isolated_x1_d; - Real *boundary_isolated_y0_d; - Real *boundary_isolated_y1_d; - Real *boundary_isolated_z0_d; - Real *boundary_isolated_z1_d; - - #ifdef MPI_CHOLLA - Real *recv_boundaries_buffer_x0_d; - Real *recv_boundaries_buffer_x1_d; - Real *recv_boundaries_buffer_y0_d; - Real *recv_boundaries_buffer_y1_d; - Real *recv_boundaries_buffer_z0_d; - Real *recv_boundaries_buffer_z1_d; - #endif + struct Fields { + Real *output_h; + + Real *input_d; + // Real *output_d; + Real *density_d; + Real *potential_d; + + bool *converged_d; + + bool *converged_h; + + Real *boundaries_buffer_x0_d; + Real *boundaries_buffer_x1_d; + Real *boundaries_buffer_y0_d; + Real *boundaries_buffer_y1_d; + Real *boundaries_buffer_z0_d; + Real *boundaries_buffer_z1_d; + + Real *boundary_isolated_x0_d; + Real *boundary_isolated_x1_d; + Real *boundary_isolated_y0_d; + Real *boundary_isolated_y1_d; + Real *boundary_isolated_z0_d; + Real *boundary_isolated_z1_d; + + #ifdef MPI_CHOLLA + Real *recv_boundaries_buffer_x0_d; + Real *recv_boundaries_buffer_x1_d; + Real *recv_boundaries_buffer_y0_d; + Real *recv_boundaries_buffer_y1_d; + Real *recv_boundaries_buffer_z0_d; + Real *recv_boundaries_buffer_z1_d; + #endif } F; - Potential_SOR_3D( void ); - - void Initialize( Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, Real dx, Real dy, Real dz ); - - void AllocateMemory_CPU( void ); - void AllocateMemory_GPU( void ); - void FreeMemory_GPU( void ); - void Reset( void ); - void Copy_Input( int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, Real current_a ); - - void Copy_Output( Real *output_potential ); - void Copy_Potential_From_Host( Real *output_potential ); - - - void Set_Boundaries( ); - // Real Get_Potential( Real *input_density, Real *output_potential, Real Grav_Constant, Real dens_avrg, Real current_a ); - // void Copy_Potential_From_Host( Real *potential_host ); - - void Allocate_Array_GPU_Real( Real **array_dev, grav_int_t size ); - void Allocate_Array_GPU_bool( bool **array_dev, grav_int_t size ); - void Free_Array_GPU_Real( Real * array_dev ); - void Free_Array_GPU_bool( bool * array_dev ); - - - - void Initialize_Potential( int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d ); - void Copy_Input_And_Initialize( Real *input_density, const Real *input_potential, Real Grav_Constant, Real dens_avrg, Real current_a ); - - void Poisson_iteration( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ); - void Poisson_iteration_Patial_1( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ); - void Poisson_iteration_Patial_2( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ); - void Poisson_Partial_Iteration( int n_step, Real omega, Real epsilon ); - - - void Load_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); - void Load_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); + Potential_SOR_3D(void); + + void Initialize(Real Lx, Real Ly, Real Lz, Real x_min, Real y_min, Real z_min, int nx, int ny, int nz, int nx_real, + int ny_real, int nz_real, Real dx, Real dy, Real dz); + + void AllocateMemory_CPU(void); + void AllocateMemory_GPU(void); + void FreeMemory_GPU(void); + void Reset(void); + void Copy_Input(int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, + Real current_a); + + void Copy_Output(Real *output_potential); + void Copy_Potential_From_Host(Real *output_potential); + + void Set_Boundaries(); + // Real Get_Potential( Real *input_density, Real *output_potential, Real + // Grav_Constant, Real dens_avrg, Real current_a ); void + // Copy_Potential_From_Host( Real *potential_host ); + + void Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size); + void Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size); + void Free_Array_GPU_Real(Real *array_dev); + void Free_Array_GPU_bool(bool *array_dev); + + void Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d); + void Copy_Input_And_Initialize(Real *input_density, const Real *input_potential, Real Grav_Constant, Real dens_avrg, + Real current_a); + + void Poisson_iteration(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, + bool *converged_d); + void Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, + bool *converged_d); + void Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, + Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, + bool *converged_d); + void Poisson_Partial_Iteration(int n_step, Real omega, Real epsilon); + + void Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); + void Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); void Load_Transfer_Buffer_GPU_x0(); void Load_Transfer_Buffer_GPU_x1(); void Load_Transfer_Buffer_GPU_y0(); void Load_Transfer_Buffer_GPU_y1(); void Load_Transfer_Buffer_GPU_z0(); void Load_Transfer_Buffer_GPU_z1(); - void Unload_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); - void Unload_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ); + void Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); + void Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d); void Unload_Transfer_Buffer_GPU_x0(); void Unload_Transfer_Buffer_GPU_x1(); void Unload_Transfer_Buffer_GPU_y0(); @@ -141,27 +148,23 @@ class Potential_SOR_3D{ void Unload_Transfer_Buffer_GPU_z0(); void Unload_Transfer_Buffer_GPU_z1(); - void Copy_Poisson_Boundary_Periodic( int direction, int side ); + void Copy_Poisson_Boundary_Periodic(int direction, int side); - void Copy_Poisson_Boundary_Open( int direction, int side ); + void Copy_Poisson_Boundary_Open(int direction, int side); // void Load_Transfer_Buffer_GPU_All(); // void Unload_Transfer_Buffer_GPU_All(); - void Copy_Transfer_Buffer_To_Host( int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d ); - void Copy_Transfer_Buffer_To_Device( int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d ); - - void Set_Isolated_Boundary_Conditions( int *boundary_flags, struct parameters *P ); - void Set_Isolated_Boundary_GPU( int direction, int side, Real *boundary_d ); + void Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d); + void Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_bufer_h, Real *transfer_buffer_d); + void Set_Isolated_Boundary_Conditions(int *boundary_flags, struct Parameters *P); + void Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d); - #ifdef MPI_CHOLLA - bool Get_Global_Converged( bool converged_local ); - #endif + #ifdef MPI_CHOLLA + bool Get_Global_Converged(bool converged_local); + #endif }; - - - -#endif //POTENTIAL_SOR_H -#endif //GRAVITY + #endif // POTENTIAL_SOR_H +#endif // GRAVITY diff --git a/src/gravity/potential_SOR_3D_gpu.cu b/src/gravity/potential_SOR_3D_gpu.cu index 47d680077..d2066edb8 100644 --- a/src/gravity/potential_SOR_3D_gpu.cu +++ b/src/gravity/potential_SOR_3D_gpu.cu @@ -1,74 +1,72 @@ #if defined(CUDA) && defined(GRAVITY) && defined(SOR) -#include "../gravity/potential_SOR_3D.h" -#include "../global/global_cuda.h" -#include "../io/io.h" + #include "../global/global_cuda.h" + #include "../gravity/potential_SOR_3D.h" + #include "../io/io.h" + #define TPB_SOR 1024 -#define TPB_SOR 1024 - - -void Potential_SOR_3D::Allocate_Array_GPU_Real( Real **array_dev, grav_int_t size ){ - cudaMalloc( (void**)array_dev, size*sizeof(Real)); - CudaCheckError(); +void Potential_SOR_3D::Allocate_Array_GPU_Real(Real **array_dev, grav_int_t size) +{ + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); } -void Potential_SOR_3D::Allocate_Array_GPU_bool( bool **array_dev, grav_int_t size ){ - cudaMalloc( (void**)array_dev, size*sizeof(bool)); - CudaCheckError(); +void Potential_SOR_3D::Allocate_Array_GPU_bool(bool **array_dev, grav_int_t size) +{ + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(bool))); } -void Potential_SOR_3D::Free_Array_GPU_Real( Real *array_dev ){ - cudaFree( array_dev ); - CudaCheckError(); -} - -void Potential_SOR_3D::Free_Array_GPU_bool( bool *array_dev ){ - cudaFree( array_dev ); - CudaCheckError(); -} +void Potential_SOR_3D::Free_Array_GPU_Real(Real *array_dev) { GPU_Error_Check(cudaFree(array_dev)); } -__global__ void Copy_Input_Kernel( int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, Real current_a ){ +void Potential_SOR_3D::Free_Array_GPU_bool(bool *array_dev) { GPU_Error_Check(cudaFree(array_dev)); } +__global__ void Copy_Input_Kernel(int n_cells, Real *input_d, Real *density_d, Real Grav_Constant, Real dens_avrg, + Real current_a) +{ int tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_cells ) return; + if (tid >= n_cells) return; #ifdef COSMOLOGY - density_d[tid] = 4 * M_PI * Grav_Constant * ( input_d[tid] - dens_avrg ) / current_a; + density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg) / current_a; #else - density_d[tid] = 4 * M_PI * Grav_Constant * ( input_d[tid] - dens_avrg ); + density_d[tid] = 4 * M_PI * Grav_Constant * (input_d[tid] - dens_avrg); #endif // if (tid == 0) printf("dens: %f\n", density_d[tid]); } - -void Potential_SOR_3D::Copy_Input( int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, Real current_a ){ - cudaMemcpy( input_d, input_density_h, n_cells*sizeof(Real), cudaMemcpyHostToDevice ); +void Potential_SOR_3D::Copy_Input(int n_cells, Real *input_d, Real *input_density_h, Real Grav_Constant, Real dens_avrg, + Real current_a) +{ + cudaMemcpy(input_d, input_density_h, n_cells * sizeof(Real), cudaMemcpyHostToDevice); // set values for GPU kernels - int ngrid = (n_cells_local + TPB_SOR - 1) / TPB_SOR; + int ngrid = (n_cells_local + TPB_SOR - 1) / TPB_SOR; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - // Copy_Input_Kernel<<>>( n_cells_local, F.input_d, F.density_d, Grav_Constant, dens_avrg, current_a ); - hipLaunchKernelGGL( Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_cells_local, F.input_d, F.density_d, Grav_Constant, dens_avrg, current_a ); - + // Copy_Input_Kernel<<>>( n_cells_local, F.input_d, + // F.density_d, Grav_Constant, dens_avrg, current_a ); + hipLaunchKernelGGL(Copy_Input_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_cells_local, F.input_d, F.density_d, + Grav_Constant, dens_avrg, current_a); } -void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer( Real *isolated_boundary_h, Real *isolated_boundary_d, int boundary_size ){ - cudaMemcpy( isolated_boundary_d, isolated_boundary_h, boundary_size*sizeof(Real), cudaMemcpyHostToDevice ); +void Grav3D::Copy_Isolated_Boundary_To_GPU_buffer(Real *isolated_boundary_h, Real *isolated_boundary_d, + int boundary_size) +{ + cudaMemcpy(isolated_boundary_d, isolated_boundary_h, boundary_size * sizeof(Real), cudaMemcpyHostToDevice); } -__global__ void Initialize_Potential_Kernel( Real init_val, Real *potential_d, Real *density_d, int nx, int ny, int nz, int n_ghost ){ - +__global__ void Initialize_Potential_Kernel(Real init_val, Real *potential_d, Real *density_d, int nx, int ny, int nz, + int n_ghost) +{ int tid_x, tid_y, tid_z, tid_pot; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return; // tid = tid_x + tid_y*nx + tid_z*nx*ny; @@ -77,88 +75,89 @@ __global__ void Initialize_Potential_Kernel( Real init_val, Real *potential_d, R tid_z += n_ghost; int nx_pot, ny_pot; - nx_pot = nx + 2*n_ghost; - ny_pot = ny + 2*n_ghost; + nx_pot = nx + 2 * n_ghost; + ny_pot = ny + 2 * n_ghost; - - tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; potential_d[tid_pot] = init_val; - //if ( potential_d[tid_pot] !=1 ) printf("Error phi value: %f\n", potential_d[tid_pot] ); - + // if ( potential_d[tid_pot] !=1 ) printf("Error phi value: %f\n", + // potential_d[tid_pot] ); // Real dens = density_d[tid]; // potential_d[tid_pot] = -dens; - } - - -void Potential_SOR_3D::Initialize_Potential( int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, Real *density_d ){ +void Potential_SOR_3D::Initialize_Potential(int nx, int ny, int nz, int n_ghost_potential, Real *potential_d, + Real *density_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; // number of blocks per 1D grid dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - // Initialize_Potential_Kernel<<>>( 1, potential_d, density_d, nx, ny, nz, n_ghost_potential ); - hipLaunchKernelGGL( Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, 1, potential_d, density_d, nx, ny, nz, n_ghost_potential ); - + // Initialize_Potential_Kernel<<>>( 1, potential_d, + // density_d, nx, ny, nz, n_ghost_potential ); + hipLaunchKernelGGL(Initialize_Potential_Kernel, dim3dGrid, dim3dBlock, 0, 0, 1, potential_d, density_d, nx, ny, nz, + n_ghost_potential); } - -__global__ void Iteration_Step_SOR( int n_cells, Real *density_d, Real *potential_d, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real omega, int parity, Real epsilon, bool *converged_d ){ - +__global__ void Iteration_Step_SOR(int n_cells, Real *density_d, Real *potential_d, int nx, int ny, int nz, int n_ghost, + Real dx, Real dy, Real dz, Real omega, int parity, Real epsilon, bool *converged_d) +{ int tid_x, tid_y, tid_z, tid, tid_pot; - tid_x = 2*( blockIdx.x * blockDim.x + threadIdx.x ); + tid_x = 2 * (blockIdx.x * blockDim.x + threadIdx.x); tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; // Make a checkboard 3D grid - if ( tid_y%2 == 0 ){ - if ( tid_z%2 == parity ) tid_x +=1; - } - else if ( (tid_z+1)%2 == parity ) tid_x +=1; + if (tid_y % 2 == 0) { + if (tid_z % 2 == parity) tid_x += 1; + } else if ((tid_z + 1) % 2 == parity) + tid_x += 1; - if (tid_x >= nx || tid_y >= ny || tid_z >= nz ) return; + if (tid_x >= nx || tid_y >= ny || tid_z >= nz) return; int nx_pot, ny_pot; - nx_pot = nx + 2*n_ghost; - ny_pot = ny + 2*n_ghost; + nx_pot = nx + 2 * n_ghost; + ny_pot = ny + 2 * n_ghost; // nz_pot = nz + 2*n_ghost; - tid = tid_x + tid_y*nx + tid_z*nx*ny; + tid = tid_x + tid_y * nx + tid_z * nx * ny; tid_x += n_ghost; tid_y += n_ghost; tid_z += n_ghost; - tid_pot = tid_x + tid_y*nx_pot + tid_z*nx_pot*ny_pot; + tid_pot = tid_x + tid_y * nx_pot + tid_z * nx_pot * ny_pot; // //Set neighbors ids int indx_l, indx_r, indx_d, indx_u, indx_b, indx_t; - indx_l = tid_x-1; //Left - indx_r = tid_x+1; //Right - indx_d = tid_y-1; //Down - indx_u = tid_y+1; //Up - indx_b = tid_z-1; //Bottom - indx_t = tid_z+1; //Top + indx_l = tid_x - 1; // Left + indx_r = tid_x + 1; // Right + indx_d = tid_y - 1; // Down + indx_u = tid_y + 1; // Up + indx_b = tid_z - 1; // Bottom + indx_t = tid_z + 1; // Top - //Boundary Conditions are loaded to the potential array, the natural indices work! + // Boundary Conditions are loaded to the potential array, the natural indices + // work! // //Periodic Boundary conditions // indx_l = tid_x == n_ghost ? nx_pot-n_ghost-1 : tid_x-1; //Left - // indx_r = tid_x == nx_pot-n_ghost-1 ? n_ghost : tid_x+1; //Right + // indx_r = tid_x == nx_pot-n_ghost-1 ? n_ghost : tid_x+1; //Right // indx_d = tid_y == n_ghost ? ny_pot-n_ghost-1 : tid_y-1; //Down // indx_u = tid_y == ny_pot-n_ghost-1 ? n_ghost : tid_y+1; //Up - // indx_b = tid_z == n_ghost ? nz_pot-n_ghost-1 : tid_z-1; //Bottom - // indx_t = tid_z == nz_pot-n_ghost-1 ? n_ghost : tid_z+1; //Top + // indx_b = tid_z == n_ghost ? nz_pot-n_ghost-1 : tid_z-1; + // //Bottom indx_t = tid_z == nz_pot-n_ghost-1 ? n_ghost : + // tid_z+1; //Top // // //Zero Gradient Boundary conditions // indx_l = tid_x == n_ghost ? tid_x+1 : tid_x-1; //Left @@ -168,163 +167,174 @@ __global__ void Iteration_Step_SOR( int n_cells, Real *density_d, Real *potentia // indx_b = tid_z == n_ghost ? tid_z+1 : tid_z-1; //Bottom // indx_t = tid_z == nz_pot-n_ghost-1 ? tid_z-1 : tid_z+1; //Top - - Real rho, phi_c, phi_l, phi_r, phi_d, phi_u, phi_b, phi_t, phi_new; - rho = density_d[tid]; + rho = density_d[tid]; phi_c = potential_d[tid_pot]; - phi_l = potential_d[ indx_l + tid_y*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_r = potential_d[ indx_r + tid_y*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_d = potential_d[ tid_x + indx_d*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_u = potential_d[ tid_x + indx_u*nx_pot + tid_z*nx_pot*ny_pot ]; - phi_b = potential_d[ tid_x + tid_y*nx_pot + indx_b*nx_pot*ny_pot ]; - phi_t = potential_d[ tid_x + tid_y*nx_pot + indx_t*nx_pot*ny_pot ]; - - phi_new = (1-omega)*phi_c + omega/6*( phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx*dx*rho ); + phi_l = potential_d[indx_l + tid_y * nx_pot + tid_z * nx_pot * ny_pot]; + phi_r = potential_d[indx_r + tid_y * nx_pot + tid_z * nx_pot * ny_pot]; + phi_d = potential_d[tid_x + indx_d * nx_pot + tid_z * nx_pot * ny_pot]; + phi_u = potential_d[tid_x + indx_u * nx_pot + tid_z * nx_pot * ny_pot]; + phi_b = potential_d[tid_x + tid_y * nx_pot + indx_b * nx_pot * ny_pot]; + phi_t = potential_d[tid_x + tid_y * nx_pot + indx_t * nx_pot * ny_pot]; + + phi_new = (1 - omega) * phi_c + omega / 6 * (phi_l + phi_r + phi_d + phi_u + phi_b + phi_t - dx * dx * rho); potential_d[tid_pot] = phi_new; // potential_d[tid_pot] = parity + 1; - //Check the residual for the convergence criteria - if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) converged_d[0] = 0; - // if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) printf("%f\n", fabs( ( phi_new - phi_c ) / phi_c) ); - // if ( ( fabs( ( phi_new - phi_c ) ) > epsilon ) ) converged_d[0] = 0; - - - - + // Check the residual for the convergence criteria + if ((fabs((phi_new - phi_c) / phi_c) > epsilon)) converged_d[0] = 0; + // if ( ( fabs( ( phi_new - phi_c ) / phi_c ) > epsilon ) ) printf("%f\n", + // fabs( ( phi_new - phi_c ) / phi_c) ); if ( ( fabs( ( phi_new - phi_c ) ) > + // epsilon ) ) converged_d[0] = 0; } -void Potential_SOR_3D::Poisson_iteration( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){ - +void Potential_SOR_3D::Poisson_iteration(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, + Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, + bool *converged_h, bool *converged_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; - int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x; // number of blocks per 1D grid dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z); dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - cudaMemset( converged_d, 1, sizeof(bool) ); - - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); + cudaMemset(converged_d, 1, sizeof(bool)); - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d); - cudaMemcpy( converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost ); + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d); + cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost); } - -void Potential_SOR_3D::Poisson_iteration_Patial_1( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){ - +void Potential_SOR_3D::Poisson_iteration_Patial_1(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, bool *converged_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; - int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x; // number of blocks per 1D grid dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z); dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - cudaMemset( converged_d, 1, sizeof(bool) ); - - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d ); + cudaMemset(converged_d, 1, sizeof(bool)); + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 0, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 0, epsilon, converged_d); } - -void Potential_SOR_3D::Poisson_iteration_Patial_2( int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, Real dy, Real dz, Real omega, Real epsilon, Real *density_d, Real *potential_d, bool *converged_h, bool *converged_d ){ - +void Potential_SOR_3D::Poisson_iteration_Patial_2(int n_cells, int nx, int ny, int nz, int n_ghost_potential, Real dx, + Real dy, Real dz, Real omega, Real epsilon, Real *density_d, + Real *potential_d, bool *converged_h, bool *converged_d) +{ // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; - int ngrid_x_half = ( nx_local/2 + tpb_x - 1) / tpb_x; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_local + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_local + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_local + tpb_z - 1) / tpb_z; + int ngrid_x_half = (nx_local / 2 + tpb_x - 1) / tpb_x; // number of blocks per 1D grid dim3 dim3dGrid_half(ngrid_x_half, ngrid_y, ngrid_z); dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - // Iteration_Step_SOR<<>>( n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - hipLaunchKernelGGL( Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d ); - - cudaMemcpy( converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost ); + // Iteration_Step_SOR<<>>( n_cells, density_d, + // potential_d, nx, ny, nz, n_ghost_potential, dx, dy, dz, omega, 1, epsilon, + // converged_d ); + hipLaunchKernelGGL(Iteration_Step_SOR, dim3dGrid_half, dim3dBlock, 0, 0, n_cells, density_d, potential_d, nx, ny, nz, + n_ghost_potential, dx, dy, dz, omega, 1, epsilon, converged_d); + cudaMemcpy(converged_h, converged_d, sizeof(bool), cudaMemcpyDeviceToHost); } - -__global__ void Set_Isolated_Boundary_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int n_ghost, int nx_pot, int ny_pot, int nz_pot, Real *potential_d, Real *boundary_d ){ - +__global__ void Set_Isolated_Boundary_GPU_kernel(int direction, int side, int size_buffer, int n_i, int n_j, + int n_ghost, int nx_pot, int ny_pot, int nz_pot, Real *potential_d, + Real *boundary_d) +{ // get a global thread ID int nx_local, ny_local, nz_local; - nx_local = nx_pot - 2*n_ghost; - ny_local = ny_pot - 2*n_ghost; - nz_local = nz_pot - 2*n_ghost; + nx_local = nx_pot - 2 * n_ghost; + ny_local = ny_pot - 2 * n_ghost; + nz_local = nz_pot - 2 * n_ghost; int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) return; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = (tid_k) + (tid_i+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_k+nx_local+n_ghost) + (tid_i+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; + if (direction == 0) { + if (side == 0) tid_pot = (tid_k) + (tid_i + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_k + nx_local + n_ghost) + (tid_i + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_k)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_k+ny_local+n_ghost)*nx_pot + (tid_j+n_ghost)*nx_pot*ny_pot; + if (direction == 1) { + if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_k)*nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i + n_ghost) + (tid_k + ny_local + n_ghost) * nx_pot + (tid_j + n_ghost) * nx_pot * ny_pot; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx_pot + (tid_k)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i+n_ghost) + (tid_j+n_ghost)*nx_pot + (tid_k+nz_local+n_ghost)*nx_pot*ny_pot; + if (direction == 2) { + if (side == 0) tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + (tid_k)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i + n_ghost) + (tid_j + n_ghost) * nx_pot + (tid_k + nz_local + n_ghost) * nx_pot * ny_pot; } potential_d[tid_pot] = boundary_d[tid_buffer]; - } -void Potential_SOR_3D::Set_Isolated_Boundary_GPU( int direction, int side, Real *boundary_d ){ - +void Potential_SOR_3D::Set_Isolated_Boundary_GPU(int direction, int side, Real *boundary_d) +{ // #ifdef MPI_CHOLLA - // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction, side ); - // #endif + // printf("Pid: %d Setting Isolated Boundary: %d %d \n",procID, direction, + // side ); #endif // int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; - nx_pot = nx_local + 2*n_ghost; - ny_pot = ny_local + 2*n_ghost; - nz_pot = nz_local + 2*n_ghost; + nx_pot = nx_local + 2 * n_ghost; + ny_pot = ny_local + 2 * n_ghost; + nz_pot = nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_local; n_j = nz_local; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_local; n_j = nz_local; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_local; n_j = ny_local; } @@ -332,74 +342,76 @@ void Potential_SOR_3D::Set_Isolated_Boundary_GPU( int direction, int side, Rea size_buffer = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - // Set_Isolated_Boundary_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d ); - hipLaunchKernelGGL( Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d ); - + // Set_Isolated_Boundary_GPU_kernel<<>>( direction, + // side, size_buffer, n_i, n_j, n_ghost, nx_pot, ny_pot, nz_pot, + // F.potential_d, boundary_d ); + hipLaunchKernelGGL(Set_Isolated_Boundary_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, + n_j, n_ghost, nx_pot, ny_pot, nz_pot, F.potential_d, boundary_d); } - - -void Potential_SOR_3D::Copy_Output( Real *output_potential ){ - cudaMemcpy( output_potential, F.potential_d, n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost ); +void Potential_SOR_3D::Copy_Output(Real *output_potential) +{ + cudaMemcpy(output_potential, F.potential_d, n_cells_potential * sizeof(Real), cudaMemcpyDeviceToHost); } -void Potential_SOR_3D::Copy_Potential_From_Host( Real *output_potential ){ - cudaMemcpy( F.potential_d, output_potential, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice ); +void Potential_SOR_3D::Copy_Potential_From_Host(Real *output_potential) +{ + cudaMemcpy(F.potential_d, output_potential, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice); } - - -__global__ void Load_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Load_Transfer_Buffer_GPU_kernel_SOR(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (nx - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential + tid_k) * nx * ny; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; } transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; - } -__global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d, int parity ){ - +__global__ void Load_Transfer_Buffer_GPU_Half_kernel(int direction, int side, int size_buffer, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost_transfer, int n_ghost_potential, + Real *potential_d, Real *transfer_buffer_d, int parity) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; int nx_pot, ny_pot, nz_pot; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; - + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; // // Make a checkboard 3D grid // tid_i = 2 * tid_i; @@ -408,78 +420,80 @@ __global__ void Load_Transfer_Buffer_GPU_Half_kernel( int direction, int side, i // } // else if ( (tid_k+1)%2 == parity ) tid_i +=1; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_i += n_ghost_potential; tid_j += n_ghost_potential; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = ( nx_pot - n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 0) { + if (side == 0) tid_pot = (n_ghost_potential + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (nx_pot - n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + ( ny_pot - n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 1) { + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i) + (ny_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( n_ghost_potential + tid_k )*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( nz_pot - n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot*ny_pot; + if (direction == 2) { + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx_pot + (n_ghost_potential + tid_k) * nx_pot * ny_pot; + if (side == 1) + tid_pot = (tid_i) + (tid_j)*nx_pot + (nz_pot - n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot; } - // printf( "Loading Buffer Half: val= %d pot= %f \n", parity+1, potential_d[tid_pot] ); + // printf( "Loading Buffer Half: val= %d pot= %f \n", parity+1, + // potential_d[tid_pot] ); transfer_buffer_d[tid_buffer] = potential_d[tid_pot]; - } - - -__global__ void Unload_Transfer_Buffer_GPU_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +__global__ void Unload_Transfer_Buffer_GPU_kernel_SOR(int direction, int side, int size_buffer, int n_i, int n_j, + int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = ( nx - n_ghost_potential + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (nx - n_ghost_potential + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + ( ny - n_ghost_potential + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx + (tid_j)*nx * ny; + if (side == 1) tid_pot = (tid_i) + (ny - n_ghost_potential + tid_k) * nx + (tid_j)*nx * ny; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx*ny; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx + ( nz - n_ghost_potential + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) tid_pot = (tid_i) + (tid_j)*nx + (n_ghost_potential - n_ghost_transfer + tid_k) * nx * ny; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx + (nz - n_ghost_potential + tid_k) * nx * ny; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; - } - -__global__ void Unload_Transfer_Buffer_GPU_Half_kernel( int direction, int side, int size_buffer, int n_i, int n_j, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d, int parity ){ - +__global__ void Unload_Transfer_Buffer_GPU_Half_kernel(int direction, int side, int size_buffer, int n_i, int n_j, + int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d, int parity) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_pot; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; int nx_pot, ny_pot, nz_pot; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; // // Make a checkboard 3D grid // tid_i = 2 * tid_i; @@ -488,47 +502,45 @@ __global__ void Unload_Transfer_Buffer_GPU_Half_kernel( int direction, int side, // } // else if ( (tid_k+1)%2 == parity ) tid_i +=1; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost_transfer) return; tid_i += n_ghost_potential; tid_j += n_ghost_potential; - - if ( direction == 0 ){ - if ( side == 0 ) tid_pot = ( n_ghost_potential - n_ghost_transfer + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = ( nx_pot - n_ghost_potential + tid_k ) + (tid_i)*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 0) { + if (side == 0) tid_pot = (n_ghost_potential - n_ghost_transfer + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) tid_pot = (nx_pot - n_ghost_potential + tid_k) + (tid_i)*nx_pot + (tid_j)*nx_pot * ny_pot; } - if ( direction == 1 ){ - if ( side == 0 ) tid_pot = (tid_i) + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + ( ny_pot - n_ghost_potential + tid_k )*nx_pot + (tid_j)*nx_pot*ny_pot; + if (direction == 1) { + if (side == 0) + tid_pot = (tid_i) + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; + if (side == 1) tid_pot = (tid_i) + (ny_pot - n_ghost_potential + tid_k) * nx_pot + (tid_j)*nx_pot * ny_pot; } - if ( direction == 2 ){ - if ( side == 0 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( n_ghost_potential - n_ghost_transfer + tid_k )*nx_pot*ny_pot; - if ( side == 1 ) tid_pot = (tid_i) + (tid_j)*nx_pot + ( nz_pot - n_ghost_potential + tid_k )*nx_pot*ny_pot; + if (direction == 2) { + if (side == 0) + tid_pot = (tid_i) + (tid_j)*nx_pot + (n_ghost_potential - n_ghost_transfer + tid_k) * nx_pot * ny_pot; + if (side == 1) tid_pot = (tid_i) + (tid_j)*nx_pot + (nz_pot - n_ghost_potential + tid_k) * nx_pot * ny_pot; } potential_d[tid_pot] = transfer_buffer_d[tid_buffer]; - } - - -void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Load_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -536,33 +548,36 @@ void Potential_SOR_3D::Load_Transfer_Buffer_GPU( int direction, int side, int nx size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Load_Transfer_Buffer_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL( Load_Transfer_Buffer_GPU_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - + // Load_Transfer_Buffer_GPU_kernel<<>>( direction, side, + // size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d ); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, + transfer_buffer_d); } - -void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, + int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) +{ int size_buffer, n_i, n_j, ngrid; - nz_pot = nz + 2*n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny; n_j = nz; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx; n_j = nz; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx; n_j = ny; } @@ -571,34 +586,37 @@ void Potential_SOR_3D::Load_Transfer_Buffer_Half_GPU( int direction, int side, i size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Load_Transfer_Buffer_GPU_Half_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - + // Load_Transfer_Buffer_GPU_Half_kernel<<>>( direction, + // side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); + hipLaunchKernelGGL(Load_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, + iteration_parity); } -void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Unload_Transfer_Buffer_GPU(int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, + int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d) +{ int nx_pot, ny_pot, nz_pot, size_buffer, n_i, n_j, ngrid; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - nz_pot = nz + 2*n_ghost_potential; + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; + nz_pot = nz + 2 * n_ghost_potential; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_pot; n_j = nz_pot; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_pot; n_j = nz_pot; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_pot; n_j = ny_pot; } @@ -606,32 +624,35 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_GPU( int direction, int side, int size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Unload_Transfer_Buffer_GPU_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel,dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d ); - + // Unload_Transfer_Buffer_GPU_kernel<<>>( direction, + // side, size_buffer, n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d ); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_kernel_SOR, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx_pot, ny_pot, nz_pot, n_ghost_transfer, n_ghost_potential, potential_d, + transfer_buffer_d); } - -void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( int direction, int side, int nx, int ny, int nz, int n_ghost_transfer, int n_ghost_potential, Real *potential_d, Real *transfer_buffer_d ){ - +void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU(int direction, int side, int nx, int ny, int nz, + int n_ghost_transfer, int n_ghost_potential, Real *potential_d, + Real *transfer_buffer_d) +{ int size_buffer, n_i, n_j, ngrid; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny; n_j = nz; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx; n_j = nz; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx; n_j = ny; } @@ -640,31 +661,27 @@ void Potential_SOR_3D::Unload_Transfer_Buffer_Half_GPU( int direction, int side, size_buffer = n_ghost_transfer * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_SOR + 1; + ngrid = (size_buffer - 1) / TPB_SOR + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_SOR, 1, 1); - - // Unload_Transfer_Buffer_GPU_Half_kernel<<>>( direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); - hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity); - + // Unload_Transfer_Buffer_GPU_Half_kernel<<>>( + // direction, side, size_buffer, n_i, n_j, nx, ny, nz, n_ghost_transfer, + // n_ghost_potential, potential_d, transfer_buffer_d, iteration_parity ); + hipLaunchKernelGGL(Unload_Transfer_Buffer_GPU_Half_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, size_buffer, + n_i, n_j, nx, ny, nz, n_ghost_transfer, n_ghost_potential, potential_d, transfer_buffer_d, + iteration_parity); } -void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host( int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d ){ - CudaSafeCall( cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer*sizeof(Real), cudaMemcpyDeviceToHost ) ); +void Potential_SOR_3D::Copy_Transfer_Buffer_To_Host(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d) +{ + GPU_Error_Check(cudaMemcpy(transfer_buffer_h, transfer_buffer_d, size_buffer * sizeof(Real), cudaMemcpyDeviceToHost)); } - -void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device( int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d ){ - CudaSafeCall( cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer*sizeof(Real), cudaMemcpyHostToDevice ) ); +void Potential_SOR_3D::Copy_Transfer_Buffer_To_Device(int size_buffer, Real *transfer_buffer_h, Real *transfer_buffer_d) +{ + GPU_Error_Check(cudaMemcpy(transfer_buffer_d, transfer_buffer_h, size_buffer * sizeof(Real), cudaMemcpyHostToDevice)); } - -#endif //GRAVITY - - - - - - +#endif // GRAVITY diff --git a/src/gravity/potential_paris_3D.cu b/src/gravity/potential_paris_3D.cu index 6c9ec503c..c3a66ae9e 100644 --- a/src/gravity/potential_paris_3D.cu +++ b/src/gravity/potential_paris_3D.cu @@ -1,79 +1,86 @@ #if defined(GRAVITY) && defined(PARIS) -#include "../gravity/potential_paris_3D.h" -#include "../utils/gpu.hpp" -#include "../io/io.h" -#include -#include -#include - -static void __attribute__((unused)) printDiff(const Real *p, const Real *q, const int ng, const int nx, const int ny, const int nz, const bool plot = false) + #include + #include + #include + + #include "../gravity/potential_paris_3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + +static void __attribute__((unused)) Print_Diff(const Real *p, const Real *q, const int ng, const int nx, const int ny, + const int nz, const bool plot = false) { Real dMax = 0, dSum = 0, dSum2 = 0; Real qMax = 0, qSum = 0, qSum2 = 0; -#pragma omp parallel for reduction(max:dMax,qMax) reduction(+:dSum,dSum2,qSum,qSum2) + #pragma omp parallel for reduction(max : dMax, qMax) reduction(+ : dSum, dSum2, qSum, qSum2) for (int k = 0; k < nz; k++) { for (int j = 0; j < ny; j++) { for (int i = 0; i < nx; i++) { - const int ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng)); + const int ijk = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); const Real qAbs = fabs(q[ijk]); - qMax = std::max(qMax,qAbs); + qMax = std::max(qMax, qAbs); qSum += qAbs; - qSum2 += qAbs*qAbs; - const Real d = fabs(q[ijk]-p[ijk]); - dMax = std::max(dMax,d); + qSum2 += qAbs * qAbs; + const Real d = fabs(q[ijk] - p[ijk]); + dMax = std::max(dMax, d); dSum += d; - dSum2 += d*d; + dSum2 += d * d; } } } - Real maxs[2] = {qMax,dMax}; - Real sums[4] = {qSum,qSum2,dSum,dSum2}; - MPI_Allreduce(MPI_IN_PLACE,&maxs,2,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD); - MPI_Allreduce(MPI_IN_PLACE,&sums,4,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); - chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n",sums[2]/sums[0],sqrt(sums[3]/sums[1]),maxs[1]/maxs[0]); + Real maxs[2] = {qMax, dMax}; + Real sums[4] = {qSum, qSum2, dSum, dSum2}; + MPI_Allreduce(MPI_IN_PLACE, &maxs, 2, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(MPI_IN_PLACE, &sums, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + chprintf(" Poisson-Solver Diff: L1 %g L2 %g Linf %g\n", sums[2] / sums[0], sqrt(sums[3] / sums[1]), + maxs[1] / maxs[0]); fflush(stdout); - if (!plot) return; + if (!plot) { + return; + } printf("###\n"); - const int k = nz/2; - //for (int j = 0; j < ny; j++) { - const int j = ny/2; - for (int i = 0; i < nx; i++) { - const int ijk = i+ng+(nx+ng+ng)*(j+ng+(ny+ng+ng)*(k+ng)); - //printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); - printf("%d %g %g %g\n",i,q[ijk],p[ijk],q[ijk]-p[ijk]); - } - printf("\n"); + const int k = nz / 2; + // for (int j = 0; j < ny; j++) { + const int j = ny / 2; + for (int i = 0; i < nx; i++) { + const int ijk = i + ng + (nx + ng + ng) * (j + ng + (ny + ng + ng) * (k + ng)); + // printf("%d %d %g %g %g\n",j,i,q[ijk],p[ijk],q[ijk]-p[ijk]); + printf("%d %g %g %g\n", i, q[ijk], p[ijk], q[ijk] - p[ijk]); + } + printf("\n"); //} MPI_Finalize(); exit(0); } -Potential_Paris_3D::Potential_Paris_3D(): - dn_{0,0,0}, - dr_{0,0,0}, - lo_{0,0,0}, - lr_{0,0,0}, - myLo_{0,0,0}, - pp_(nullptr), - minBytes_(0), - densityBytes_(0), - potentialBytes_(0), - da_(nullptr), - db_(nullptr) -{} - -Potential_Paris_3D::~Potential_Paris_3D() { Reset(); } - -void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset, const Real a) +PotentialParis3D::PotentialParis3D() + : dn_{0, 0, 0}, + dr_{0, 0, 0}, + lo_{0, 0, 0}, + lr_{0, 0, 0}, + myLo_{0, 0, 0}, + pp_(nullptr), + minBytes_(0), + densityBytes_(0), + potentialBytes_(0), + da_(nullptr), + db_(nullptr) { -#ifdef COSMOLOGY - const Real scale = Real(4)*M_PI*g/a; -#else - const Real scale = Real(4)*M_PI*g; -#endif +} + +PotentialParis3D::~PotentialParis3D() { Reset(); } + +void PotentialParis3D::Get_Potential(const Real *const density, Real *const potential, const Real g, const Real offset, + const Real a) +{ + #ifdef COSMOLOGY + const Real scale = Real(4) * M_PI * g / a; + #else + const Real scale = Real(4) * M_PI * g; + #endif assert(da_); Real *const da = da_; Real *const db = db_; @@ -83,45 +90,47 @@ void Potential_Paris_3D::Get_Potential(const Real *const density, Real *const po const int nj = dn_[1]; const int nk = dn_[0]; - const int n = ni*nj*nk; + const int n = ni * nj * nk; #ifdef GRAVITY_GPU - CHECK(cudaMemcpy(db,density,densityBytes_,cudaMemcpyDeviceToDevice)); + GPU_Error_Check(cudaMemcpy(db, density, densityBytes_, cudaMemcpyDeviceToDevice)); #else - CHECK(cudaMemcpy(db,density,densityBytes_,cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(db, density, densityBytes_, cudaMemcpyHostToDevice)); #endif - const int ngi = ni+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - const int ngj = nj+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; + const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; - gpuFor(n,GPU_LAMBDA(const int i) { db[i] = scale*(db[i]-offset); }); - pp_->solve(minBytes_,db,da); gpuFor( - nk,nj,ni, - GPU_LAMBDA(const int k, const int j, const int i) { - const int ia = i+ni*(j+nj*k); - const int ib = i+N_GHOST_POTENTIAL+ngi*(j+N_GHOST_POTENTIAL+ngj*(k+N_GHOST_POTENTIAL)); - db[ib] = da[ia]; - }); + n, GPU_LAMBDA(const int i) { db[i] = scale * (db[i] - offset); }); + pp_->solve(minBytes_, db, da); + gpuFor( + nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { + const int ia = i + ni * (j + nj * k); + const int ib = i + N_GHOST_POTENTIAL + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); + db[ib] = da[ia]; + }); assert(potential); #ifdef GRAVITY_GPU - CHECK(cudaMemcpy(potential,db,potentialBytes_,cudaMemcpyDeviceToDevice)); + GPU_Error_Check(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToDevice)); #else - CHECK(cudaMemcpy(potential,db,potentialBytes_,cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaMemcpy(potential, db, potentialBytes_, cudaMemcpyDeviceToHost)); #endif } -void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, const Real zMin, const int nx, const int ny, const int nz, const int nxReal, const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) +void PotentialParis3D::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, const int nz, const int nxReal, + const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) { chprintf(" Using Poisson Solver: Paris Periodic"); -#ifdef PARIS_5PT + #ifdef PARIS_5PT chprintf(" 5-Point\n"); -#elif defined PARIS_3PT + #elif defined PARIS_3PT chprintf(" 3-Point\n"); -#else + #else chprintf(" Spectral\n"); -#endif + #endif - const long nl012 = long(nxReal)*long(nyReal)*long(nzReal); + const long nl012 = long(nxReal) * long(nyReal) * long(nzReal); assert(nl012 <= INT_MAX); dn_[0] = nzReal; @@ -139,43 +148,53 @@ void Potential_Paris_3D::Initialize(const Real lx, const Real ly, const Real lz, myLo_[0] = zMin; myLo_[1] = yMin; myLo_[2] = xMin; - MPI_Allreduce(myLo_,lo_,3,MPI_DOUBLE,MPI_MIN,MPI_COMM_WORLD); - - const Real hi[3] = {lo_[0]+lz-dr_[0],lo_[1]+ly-dr_[1],lo_[2]+lx-dr_[2]}; - const int n[3] = {nz,ny,nx}; - const int m[3] = {n[0]/nzReal,n[1]/nyReal,n[2]/nxReal}; - const int id[3] = {int(round((zMin-lo_[0])/(dn_[0]*dr_[0]))),int(round((yMin-lo_[1])/(dn_[1]*dr_[1]))),int(round((xMin-lo_[2])/(dn_[2]*dr_[2])))}; - chprintf(" Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d ]\n",lo_[2],lo_[1],lo_[0],lo_[2]+lx,lo_[1]+ly,lo_[0]+lz,dn_[2],dn_[1],dn_[0],m[2],m[1],m[0]); - - assert(dn_[0] == n[0]/m[0]); - assert(dn_[1] == n[1]/m[1]); - assert(dn_[2] == n[2]/m[2]); - - pp_ = new ParisPeriodic(n,lo_,hi,m,id); + MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + + const Real hi[3] = {lo_[0] + lz - dr_[0], lo_[1] + ly - dr_[1], lo_[2] + lx - dr_[2]}; + const int n[3] = {nz, ny, nx}; + const int m[3] = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal}; + const int id[3] = {int(round((zMin - lo_[0]) / (dn_[0] * dr_[0]))), int(round((yMin - lo_[1]) / (dn_[1] * dr_[1]))), + int(round((xMin - lo_[2]) / (dn_[2] * dr_[2])))}; + chprintf( + " Paris: [ %g %g %g ]-[ %g %g %g ] N_local[ %d %d %d ] Tasks[ %d %d %d " + "]\n", + lo_[2], lo_[1], lo_[0], lo_[2] + lx, lo_[1] + ly, lo_[0] + lz, dn_[2], dn_[1], dn_[0], m[2], m[1], m[0]); + + assert(dn_[0] == n[0] / m[0]); + assert(dn_[1] == n[1] / m[1]); + assert(dn_[2] == n[2] / m[2]); + + pp_ = new ParisPeriodic(n, lo_, hi, m, id); assert(pp_); - minBytes_ = pp_->bytes(); - densityBytes_ = long(sizeof(Real))*dn_[0]*dn_[1]*dn_[2]; - const long gg = N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - potentialBytes_ = long(sizeof(Real))*(dn_[0]+gg)*(dn_[1]+gg)*(dn_[2]+gg); + minBytes_ = pp_->bytes(); + densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; + const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); - CHECK(cudaMalloc(reinterpret_cast(&da_),std::max(minBytes_,densityBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); assert(da_); - CHECK(cudaMalloc(reinterpret_cast(&db_),std::max(minBytes_,potentialBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, potentialBytes_))); assert(db_); } -void Potential_Paris_3D::Reset() +void PotentialParis3D::Reset() { - if (db_) CHECK(cudaFree(db_)); + if (db_) { + GPU_Error_Check(cudaFree(db_)); + } db_ = nullptr; - if (da_) CHECK(cudaFree(da_)); + if (da_) { + GPU_Error_Check(cudaFree(da_)); + } da_ = nullptr; potentialBytes_ = densityBytes_ = minBytes_ = 0; - if (pp_) delete pp_; + if (pp_) { + delete pp_; + } pp_ = nullptr; myLo_[2] = myLo_[1] = myLo_[0] = 0; diff --git a/src/gravity/potential_paris_3D.h b/src/gravity/potential_paris_3D.h index b6d85d5d2..be80c4116 100644 --- a/src/gravity/potential_paris_3D.h +++ b/src/gravity/potential_paris_3D.h @@ -2,25 +2,28 @@ #if defined(GRAVITY) && defined(PARIS) -#include "paris/ParisPeriodic.hpp" -#include "../global/global.h" + #include "../global/global.h" + #include "paris/ParisPeriodic.hpp" -class Potential_Paris_3D { - public: - Potential_Paris_3D(); - ~Potential_Paris_3D(); - void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a); - void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz); - void Reset(); - protected: - int dn_[3]; - Real dr_[3],lo_[3],lr_[3],myLo_[3]; - ParisPeriodic *pp_; - long minBytes_; - long densityBytes_; - long potentialBytes_; - Real *da_; - Real *db_; +class PotentialParis3D +{ + public: + PotentialParis3D(); + ~PotentialParis3D(); + void Get_Potential(const Real *density, Real *potential, Real g, Real massInfo, Real a); + void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, + int nyReal, int nzReal, Real dx, Real dy, Real dz); + void Reset(); + + protected: + int dn_[3]; + Real dr_[3], lo_[3], lr_[3], myLo_[3]; + ParisPeriodic *pp_; + long minBytes_; + long densityBytes_; + long potentialBytes_; + Real *da_; + Real *db_; }; #endif diff --git a/src/gravity/potential_paris_galactic.cu b/src/gravity/potential_paris_galactic.cu index db53ea31a..fbb38df28 100644 --- a/src/gravity/potential_paris_galactic.cu +++ b/src/gravity/potential_paris_galactic.cu @@ -1,32 +1,36 @@ #ifdef PARIS_GALACTIC -#include "../gravity/potential_paris_galactic.h" -#include "../io/io.h" -#include "../utils/gpu.hpp" -#include - -Potential_Paris_Galactic::Potential_Paris_Galactic(): - dn_{0,0,0}, - dr_{0,0,0}, - lo_{0,0,0}, - lr_{0,0,0}, - myLo_{0,0,0}, - pp_(nullptr), - densityBytes_(0), - minBytes_(0), - da_(nullptr), - db_(nullptr) -#ifndef GRAVITY_GPU - , potentialBytes_(0), - dc_(nullptr) -#endif -{} + #include + + #include "../gravity/potential_paris_galactic.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + +PotentialParisGalactic::PotentialParisGalactic() + : dn_{0, 0, 0}, + dr_{0, 0, 0}, + lo_{0, 0, 0}, + lr_{0, 0, 0}, + myLo_{0, 0, 0}, + pp_(nullptr), + densityBytes_(0), + minBytes_(0), + da_(nullptr), + db_(nullptr) + #ifndef GRAVITY_GPU + , + potentialBytes_(0), + dc_(nullptr) + #endif +{ +} -Potential_Paris_Galactic::~Potential_Paris_Galactic() { Reset(); } +PotentialParisGalactic::~PotentialParisGalactic() { Reset(); } -void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *const potential, const Real g, const DiskGalaxy &galaxy) +void PotentialParisGalactic::Get_Potential(const Real *const density, Real *const potential, const Real g, + const DiskGalaxy &galaxy) { - const Real scale = Real(4)*M_PI*g; + const Real scale = Real(4) * M_PI * g; assert(da_); Real *const da = da_; @@ -37,18 +41,18 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co const int nj = dn_[1]; const int nk = dn_[0]; - const int ngi = ni+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - const int ngj = nj+N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; + const int ngi = ni + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + const int ngj = nj + N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; -#ifdef GRAVITY_GPU + #ifdef GRAVITY_GPU const Real *const rho = density; - Real *const phi = potential; -#else - CHECK(cudaMemcpyAsync(da,density,densityBytes_,cudaMemcpyHostToDevice,0)); - CHECK(cudaMemcpyAsync(dc_,potential,potentialBytes_,cudaMemcpyHostToDevice,0)); + Real *const phi = potential; + #else + GPU_Error_Check(cudaMemcpyAsync(da, density, densityBytes_, cudaMemcpyHostToDevice, 0)); + GPU_Error_Check(cudaMemcpyAsync(dc_, potential, potentialBytes_, cudaMemcpyHostToDevice, 0)); const Real *const rho = da; - Real *const phi = dc_; -#endif + Real *const phi = dc_; + #endif const Real xMin = myLo_[2]; const Real yMin = myLo_[1]; @@ -58,59 +62,59 @@ void Potential_Paris_Galactic::Get_Potential(const Real *const density, Real *co const Real dy = dr_[1]; const Real dz = dr_[0]; - const Real md = galaxy.getM_d(); + const Real md = SIMULATED_FRACTION * galaxy.getM_d(); const Real rd = galaxy.getR_d(); const Real zd = galaxy.getZ_d(); - const Real rho0 = md*zd*zd/(4.0*M_PI); + const Real rho0 = md * zd * zd / (4.0 * M_PI); gpuFor( - nk,nj,ni, - GPU_LAMBDA(const int k, const int j, const int i) { - const int ia = i+ni*(j+nj*k); + nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { + const int ia = i + ni * (j + nj * k); - const Real x = xMin+i*dx; - const Real y = yMin+j*dy; - const Real z = zMin+k*dz; + const Real x = xMin + i * dx; + const Real y = yMin + j * dy; + const Real z = zMin + k * dz; - const Real r = sqrt(x*x+y*y); - const Real a = sqrt(z*z+zd*zd); - const Real b = rd+a; - const Real c = r*r+b*b; - const Real dRho = rho0*(rd*c+3.0*a*b*b)/(a*a*a*pow(c,2.5)); + const Real r = sqrt(x * x + y * y); + const Real a = sqrt(z * z + zd * zd); + const Real b = rd + a; + const Real c = r * r + b * b; + const Real dRho = rho0 * (rd * c + 3.0 * a * b * b) / (a * a * a * pow(c, 2.5)); - da[ia] = scale*(rho[ia]-dRho); - }); + da[ia] = scale * (rho[ia] - dRho); + }); - pp_->solve(minBytes_,da,db); + pp_->solve(minBytes_, da, db); - const Real phi0 = -g*md; + const Real phi0 = -g * md; gpuFor( - nk,nj,ni, - GPU_LAMBDA(const int k, const int j, const int i) { - const int ia = i+ni*(j+nj*k); - const int ib = i+N_GHOST_POTENTIAL+ngi*(j+N_GHOST_POTENTIAL+ngj*(k+N_GHOST_POTENTIAL)); - - const Real x = xMin+i*dx; - const Real y = yMin+j*dy; - const Real z = zMin+k*dz; - - const Real r = sqrt(x*x+y*y); - const Real a = sqrt(z*z+zd*zd); - const Real b = a+rd; - const Real c = sqrt(r*r+b*b); - const Real dPhi = phi0/c; - - phi[ib] = db[ia]+dPhi; - }); - -#ifndef GRAVITY_GPU - CHECK(cudaMemcpy(potential,dc_,potentialBytes_,cudaMemcpyDeviceToHost)); -#endif + nk, nj, ni, GPU_LAMBDA(const int k, const int j, const int i) { + const int ia = i + ni * (j + nj * k); + const int ib = i + N_GHOST_POTENTIAL + ngi * (j + N_GHOST_POTENTIAL + ngj * (k + N_GHOST_POTENTIAL)); + + const Real x = xMin + i * dx; + const Real y = yMin + j * dy; + const Real z = zMin + k * dz; + + const Real r = sqrt(x * x + y * y); + const Real a = sqrt(z * z + zd * zd); + const Real b = a + rd; + const Real c = sqrt(r * r + b * b); + const Real dPhi = phi0 / c; + + phi[ib] = db[ia] + dPhi; + }); + + #ifndef GRAVITY_GPU + GPU_Error_Check(cudaMemcpy(potential, dc_, potentialBytes_, cudaMemcpyDeviceToHost)); + #endif } -void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, const Real zMin, const int nx, const int ny, const int nz, const int nxReal, const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) +void PotentialParisGalactic::Initialize(const Real lx, const Real ly, const Real lz, const Real xMin, const Real yMin, + const Real zMin, const int nx, const int ny, const int nz, const int nxReal, + const int nyReal, const int nzReal, const Real dx, const Real dy, const Real dz) { - const long nl012 = long(nxReal)*long(nyReal)*long(nzReal); + const long nl012 = long(nxReal) * long(nyReal) * long(nzReal); assert(nl012 <= INT_MAX); dn_[0] = nzReal; @@ -125,53 +129,66 @@ void Potential_Paris_Galactic::Initialize(const Real lx, const Real ly, const Re lr_[1] = ly; lr_[2] = lx; - myLo_[0] = zMin+0.5*dr_[0]; - myLo_[1] = yMin+0.5*dr_[1]; - myLo_[2] = xMin+0.5*dr_[2]; - MPI_Allreduce(myLo_,lo_,3,MPI_DOUBLE,MPI_MIN,MPI_COMM_WORLD); - - const Real hi[3] = {lo_[0]+lr_[0]-dr_[0],lo_[1]+lr_[1]-dr_[1],lo_[2]+lr_[1]-dr_[2]}; - const int n[3] = {nz,ny,nx}; - const int m[3] = {n[0]/nzReal,n[1]/nyReal,n[2]/nxReal}; - const int id[3] = {int(round((myLo_[0]-lo_[0])/(dn_[0]*dr_[0]))),int(round((myLo_[1]-lo_[1])/(dn_[1]*dr_[1]))),int(round((myLo_[2]-lo_[2])/(dn_[2]*dr_[2])))}; - chprintf(" Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ %d %d %d ]\n",lo_[2],lo_[1],lo_[0],hi[2],hi[1],hi[0],dn_[2],dn_[1],dn_[0],m[2],m[1],m[0]); - - assert(dn_[0] == n[0]/m[0]); - assert(dn_[1] == n[1]/m[1]); - assert(dn_[2] == n[2]/m[2]); - - pp_ = new PoissonZero3DBlockedGPU(n,lo_,hi,m,id); + myLo_[0] = zMin + 0.5 * dr_[0]; + myLo_[1] = yMin + 0.5 * dr_[1]; + myLo_[2] = xMin + 0.5 * dr_[2]; + MPI_Allreduce(myLo_, lo_, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + + const Real hi[3] = {lo_[0] + lr_[0] - dr_[0], lo_[1] + lr_[1] - dr_[1], lo_[2] + lr_[1] - dr_[2]}; + const int n[3] = {nz, ny, nx}; + const int m[3] = {n[0] / nzReal, n[1] / nyReal, n[2] / nxReal}; + const int id[3] = {int(round((myLo_[0] - lo_[0]) / (dn_[0] * dr_[0]))), + int(round((myLo_[1] - lo_[1]) / (dn_[1] * dr_[1]))), + int(round((myLo_[2] - lo_[2]) / (dn_[2] * dr_[2])))}; + chprintf( + " Paris Galactic: [ %g %g %g ]-[ %g %g %g ] n_local[ %d %d %d ] tasks[ " + "%d %d %d ]\n", + lo_[2], lo_[1], lo_[0], hi[2], hi[1], hi[0], dn_[2], dn_[1], dn_[0], m[2], m[1], m[0]); + + assert(dn_[0] == n[0] / m[0]); + assert(dn_[1] == n[1] / m[1]); + assert(dn_[2] == n[2] / m[2]); + + pp_ = new PoissonZero3DBlockedGPU(n, lo_, hi, m, id); assert(pp_); - minBytes_ = pp_->bytes(); - densityBytes_ = long(sizeof(Real))*dn_[0]*dn_[1]*dn_[2]; + minBytes_ = pp_->bytes(); + densityBytes_ = long(sizeof(Real)) * dn_[0] * dn_[1] * dn_[2]; - CHECK(cudaMalloc(reinterpret_cast(&da_),std::max(minBytes_,densityBytes_))); - CHECK(cudaMalloc(reinterpret_cast(&db_),std::max(minBytes_,densityBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&da_), std::max(minBytes_, densityBytes_))); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&db_), std::max(minBytes_, densityBytes_))); -#ifndef GRAVITY_GPU - const long gg = N_GHOST_POTENTIAL+N_GHOST_POTENTIAL; - potentialBytes_ = long(sizeof(Real))*(dn_[0]+gg)*(dn_[1]+gg)*(dn_[2]+gg); - CHECK(cudaMalloc(reinterpret_cast(&dc_),potentialBytes_)); -#endif + #ifndef GRAVITY_GPU + const long gg = N_GHOST_POTENTIAL + N_GHOST_POTENTIAL; + potentialBytes_ = long(sizeof(Real)) * (dn_[0] + gg) * (dn_[1] + gg) * (dn_[2] + gg); + GPU_Error_Check(cudaMalloc(reinterpret_cast(&dc_), potentialBytes_)); + #endif } -void Potential_Paris_Galactic::Reset() +void PotentialParisGalactic::Reset() { -#ifndef GRAVITY_GPU - if (dc_) CHECK(cudaFree(dc_)); - dc_ = nullptr; + #ifndef GRAVITY_GPU + if (dc_) { + GPU_Error_Check(cudaFree(dc_)); + } + dc_ = nullptr; potentialBytes_ = 0; -#endif + #endif - if (db_) CHECK(cudaFree(db_)); + if (db_) { + GPU_Error_Check(cudaFree(db_)); + } db_ = nullptr; - if (da_) CHECK(cudaFree(da_)); + if (da_) { + GPU_Error_Check(cudaFree(da_)); + } da_ = nullptr; densityBytes_ = minBytes_ = 0; - if (pp_) delete pp_; + if (pp_) { + delete pp_; + } pp_ = nullptr; myLo_[2] = myLo_[1] = myLo_[0] = 0; diff --git a/src/gravity/potential_paris_galactic.h b/src/gravity/potential_paris_galactic.h index bb05fa310..95fb1fc1f 100644 --- a/src/gravity/potential_paris_galactic.h +++ b/src/gravity/potential_paris_galactic.h @@ -2,29 +2,32 @@ #ifdef PARIS_GALACTIC -#include "paris/PoissonZero3DBlockedGPU.hpp" -#include "../global/global.h" -#include "../model/disk_galaxy.h" + #include "../global/global.h" + #include "../model/disk_galaxy.h" + #include "paris/PoissonZero3DBlockedGPU.hpp" -class Potential_Paris_Galactic { - public: - Potential_Paris_Galactic(); - ~Potential_Paris_Galactic(); - void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy); - void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, int nyReal, int nzReal, Real dx, Real dy, Real dz); - void Reset(); - protected: - int dn_[3]; - Real dr_[3],lo_[3],lr_[3],myLo_[3]; - PoissonZero3DBlockedGPU *pp_; - long densityBytes_; - long minBytes_; - Real *da_; - Real *db_; -#ifndef GRAVITY_GPU - long potentialBytes_; - Real *dc_; -#endif +class PotentialParisGalactic +{ + public: + PotentialParisGalactic(); + ~PotentialParisGalactic(); + void Get_Potential(const Real *density, Real *potential, Real g, const DiskGalaxy &galaxy); + void Initialize(Real lx, Real ly, Real lz, Real xMin, Real yMin, Real zMin, int nx, int ny, int nz, int nxReal, + int nyReal, int nzReal, Real dx, Real dy, Real dz); + void Reset(); + + protected: + int dn_[3]; + Real dr_[3], lo_[3], lr_[3], myLo_[3]; + PoissonZero3DBlockedGPU *pp_; + long densityBytes_; + long minBytes_; + Real *da_; + Real *db_; + #ifndef GRAVITY_GPU + long potentialBytes_; + Real *dc_; + #endif }; #endif diff --git a/src/gravity/static_grav.h b/src/gravity/static_grav.h index 3ddbb86be..e671555bf 100644 --- a/src/gravity/static_grav.h +++ b/src/gravity/static_grav.h @@ -2,161 +2,209 @@ * \brief Definitions of functions to calculate gravitational acceleration in 1, 2, and 3D. Called in Update_Conserved_Variables functions in hydro_cuda.cu. */ -#ifdef CUDA #pragma once +#include // provides sqrt log cos sin atan etc. #include -#include // provides sqrt log cos sin atan etc. -#include "../global/global.h" // provides GN etc. + +#include "../global/global.h" // provides GN etc. // Work around lack of pow(Real,int) in Hip Clang for Rocm 3.5 -static inline __device__ Real pow2(const Real x) { return x*x; } +static inline __device__ Real pow2(const Real x) { return x * x; } -inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, Real dx, Real xbound, Real *gx) +inline __device__ void calc_g_1D(int xid, int x_off, int n_ghost, int custom_grav, Real dx, Real xbound, Real *gx) { Real x_pos, r_disk, r_halo; - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - - // for disk components, calculate polar r - //r_disk = 0.220970869121; - //r_disk = 6.85009694274; - r_disk = 13.9211647546; - //r_disk = 20.9922325665; - // for halo, calculate spherical r - r_halo = sqrt(x_pos*x_pos + r_disk*r_disk); - - // set properties of halo and disk (these must match initial conditions) - Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun - M_h = M_vir - M_d; // halo mass in M_sun - R_vir = 261; // viral radius in kpc - c_vir = 20.0; // halo concentration - R_h = R_vir / c_vir; // halo scale length in kpc - R_d = 3.5; // disk scale length in kpc - z_d = 3.5/5.0; // disk scale height in kpc - phi_0_h = GN * M_h / (log(1.0+c_vir) - c_vir / (1.0+c_vir)); - x = r_halo / R_h; - - // calculate acceleration due to NFW halo & Miyamoto-Nagai disk - a_halo = - phi_0_h * (log(1+x) - x/(1+x)) / (r_halo*r_halo); - a_disk_z = - GN * M_d * x_pos * (R_d + sqrt(x_pos*x_pos + z_d*z_d)) / ( pow(r_disk*r_disk + pow2(R_d + sqrt(x_pos*x_pos + z_d*z_d)), 1.5) * sqrt(x_pos*x_pos + z_d*z_d) ); - - // total acceleration is the sum of the halo + disk components - *gx = (x_pos/r_halo)*a_halo + a_disk_z; - + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + // set gravity field according to parameter file input + switch (custom_grav) { + case 1: + // 1D NFW halo & Miyamoto-Nagai disk + // for disk components, calculate polar r + // r_disk = 0.220970869121; + // r_disk = 6.85009694274; + r_disk = 13.9211647546; + // r_disk = 20.9922325665; + // for halo, calculate spherical r + r_halo = sqrt(x_pos * x_pos + r_disk * r_disk); + + // set properties of halo and disk (these must match initial conditions) + Real a_disk_z, a_halo, M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; + M_vir = 1.0e12; // viral mass of MW in M_sun + M_d = 6.5e10; // mass of disk in M_sun + M_h = M_vir - M_d; // halo mass in M_sun + R_vir = 261; // viral radius in kpc + c_vir = 20.0; // halo concentration + R_h = R_vir / c_vir; // halo scale length in kpc + R_d = 3.5; // disk scale length in kpc + z_d = 3.5 / 5.0; // disk scale height in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; + + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_disk_z = + -GN * M_d * x_pos * (R_d + sqrt(x_pos * x_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(x_pos * x_pos + z_d * z_d)), 1.5) * sqrt(x_pos * x_pos + z_d * z_d)); + + // total acceleration is the sum of the halo + disk components + *gx = (x_pos / r_halo) * a_halo + a_disk_z; + break; + default: + *gx = 0; + } return; - } - -inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real *gx, Real *gy) +inline __device__ void calc_g_2D(int xid, int yid, int x_off, int y_off, int n_ghost, int custom_grav, Real dx, Real dy, + Real xbound, Real ybound, Real *gx, Real *gy) { Real x_pos, y_pos, r, phi; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - - // for Gresho, also need r & phi - r = sqrt(x_pos*x_pos + y_pos*y_pos); + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + // for Gresho and disks, also need r & phi + r = sqrt(x_pos * x_pos + y_pos * y_pos); phi = atan2(y_pos, x_pos); - -/* - // set acceleration to balance v_phi in Gresho problem - if (r < 0.2) { - *gx = -cos(phi)*25.0*r; - *gy = -sin(phi)*25.0*r; - } - else if (r >= 0.2 && r < 0.4) { - *gx = -cos(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - *gy = -sin(phi)*(4.0 - 20.0*r + 25.0*r*r)/r; - } - else { - *gx = 0.0; - *gy = 0.0; + switch (custom_grav) { + case 1: + // Gresho vortex + // set acceleration to balance v_phi in Gresho problem + if (r < 0.2) { + *gx = -cos(phi) * 25.0 * r; + *gy = -sin(phi) * 25.0 * r; + } else if (r >= 0.2 && r < 0.4) { + *gx = -cos(phi) * (4.0 - 20.0 * r + 25.0 * r * r) / r; + *gy = -sin(phi) * (4.0 - 20.0 * r + 25.0 * r * r) / r; + } else { + *gx = 0.0; + *gy = 0.0; + } + break; + case 2: + // Rayleigh-Taylor instability + *gx = 0; + *gy = -1; + break; + case 3: + // 2D disk in keplerian rotation + Real M; + M = 1 * MSUN_CGS; + *gx = -cos(phi) * GN * M / (r * r); + *gy = -sin(phi) * GN * M / (r * r); + break; + case 4: + // set gravitational acceleration for Kuzmin disk + NFW halo + Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; + M_vir = 1.0e12; // viral mass of MW in M_sun + M_d = 6.5e10; // mass of disk in M_sun (assume all gas) + M_h = M_vir - M_d; // halo mass in M_sun + R_vir = 261; // viral radius in kpc + c_vir = 20; // halo concentration + R_s = R_vir / c_vir; // halo scale length in kpc + R_d = 3.5; // disk scale length in kpc + + // calculate acceleration + x = r / R_s; + a_d = GN * M_d * r * pow(r * r + R_d * R_d, -1.5); + a_h = GN * M_h * (log(1 + x) - x / (1 + x)) / ((log(1 + c_vir) - c_vir / (1 + c_vir)) * r * r); + a = a_d + a_h; + + *gx = -cos(phi) * a; + *gy = -sin(phi) * a; + break; + default: + *gx = 0; + *gy = 0; } -*/ -/* - // set gravitational acceleration for Keplarian potential - Real M; - M = 1*Msun; - *gx = -cos(phi)*GN*M/(r*r); - *gy = -sin(phi)*GN*M/(r*r); -*/ - // set gravitational acceleration for Kuzmin disk + NFW halo - Real a_d, a_h, a, M_vir, M_d, R_vir, R_d, R_s, M_h, c_vir, x; - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun (assume all gas) - M_h = M_vir - M_d; // halo mass in M_sun - R_vir = 261; // viral radius in kpc - c_vir = 20; // halo concentration - R_s = R_vir / c_vir; // halo scale length in kpc - R_d = 3.5; // disk scale length in kpc - - // calculate acceleration - x = r / R_s; - a_d = GN * M_d * r * pow(r*r + R_d*R_d, -1.5); - a_h = GN * M_h * (log(1+x)- x / (1+x)) / ((log(1+c_vir) - c_vir / (1+c_vir)) * r*r); - a = a_d + a_h; - - *gx = -cos(phi)*a; - *gy = -sin(phi)*a; return; } - -inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz) +inline __device__ void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, + int custom_grav, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real *gx, Real *gy, Real *gz) { Real x_pos, y_pos, z_pos, r_disk, r_halo; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for disk components, calculate polar r - r_disk = sqrt(x_pos*x_pos + y_pos*y_pos); + r_disk = sqrt(x_pos * x_pos + y_pos * y_pos); // for halo, calculate spherical r - r_halo = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); - - // set properties of halo and disk (these must match initial conditions) + r_halo = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); Real a_disk_r, a_disk_z, a_halo, a_halo_r, a_halo_z; Real M_vir, M_d, R_vir, R_d, z_d, R_h, M_h, c_vir, phi_0_h, x; - // MW model - M_vir = 1.0e12; // viral mass of in M_sun - M_d = 6.5e10; // viral mass of in M_sun - R_d = 3.5; // disk scale length in kpc - z_d = 3.5/5.0; // disk scale height in kpc - R_vir = 261.; // virial radius in kpc - c_vir = 20.0; // halo concentration - // M82 model - //M_vir = 5.0e10; // viral mass of in M_sun - //M_d = 1.0e10; // mass of disk in M_sun - //R_d = 0.8; // disk scale length in kpc - //z_d = 0.15; // disk scale height in kpc - //R_vir = R_d/0.015; // viral radius in kpc - //c_vir = 10.0; // halo concentration - - M_h = M_vir - M_d; // halo mass in M_sun - R_h = R_vir / c_vir; // halo scale length in kpc - phi_0_h = GN * M_h / (log(1.0+c_vir) - c_vir / (1.0+c_vir)); - x = r_halo / R_h; - - // calculate acceleration due to NFW halo & Miyamoto-Nagai disk - a_halo = - phi_0_h * (log(1+x) - x/(1+x)) / (r_halo*r_halo); - a_halo_r = a_halo*(r_disk/r_halo); - a_halo_z = a_halo*(z_pos/r_halo); - a_disk_r = - GN * M_d * r_disk * pow(r_disk*r_disk+ pow2(R_d + sqrt(z_pos*z_pos + z_d*z_d)), -1.5); - a_disk_z = - GN * M_d * z_pos * (R_d + sqrt(z_pos*z_pos + z_d*z_d)) / ( pow(r_disk*r_disk + pow2(R_d + sqrt(z_pos*z_pos + z_d*z_d)), 1.5) * sqrt(z_pos*z_pos + z_d*z_d) ); - - // total acceleration is the sum of the halo + disk components - *gx = (x_pos/r_disk)*(a_disk_r+a_halo_r); - *gy = (y_pos/r_disk)*(a_disk_r+a_halo_r); - *gz = a_disk_z+a_halo_z; - + switch (custom_grav) { + case 1: + // Milky way disk model + // set properties of halo and disk (these must match initial conditions) + + M_vir = 1.0e12; // viral mass of in M_sun + M_d = 6.5e10; // viral mass of in M_sun + R_d = 3.5; // disk scale length in kpc + z_d = 3.5 / 5.0; // disk scale height in kpc + R_vir = 261.; // virial radius in kpc + c_vir = 20.0; // halo concentration + + M_h = M_vir - M_d; // halo mass in M_sun + R_h = R_vir / c_vir; // halo scale length in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; + + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_halo_r = a_halo * (r_disk / r_halo); + a_halo_z = a_halo * (z_pos / r_halo); + a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); + a_disk_z = + -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d)); + + // total acceleration is the sum of the halo + disk components + *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); + *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); + *gz = a_disk_z + a_halo_z; + break; + case 2: + // M82 model + // set properties of halo and disk (these must match initial conditions) + + M_vir = 5.0e10; // viral mass of in M_sun + M_d = 1.0e10; // mass of disk in M_sun + R_d = 0.8; // disk scale length in kpc + z_d = 0.15; // disk scale height in kpc + R_vir = R_d / 0.015; // viral radius in kpc + c_vir = 10.0; // halo concentration + + M_h = M_vir - M_d; // halo mass in M_sun + R_h = R_vir / c_vir; // halo scale length in kpc + phi_0_h = GN * M_h / (log(1.0 + c_vir) - c_vir / (1.0 + c_vir)); + x = r_halo / R_h; + + // calculate acceleration due to NFW halo & Miyamoto-Nagai disk + a_halo = -phi_0_h * (log(1 + x) - x / (1 + x)) / (r_halo * r_halo); + a_halo_r = a_halo * (r_disk / r_halo); + a_halo_z = a_halo * (z_pos / r_halo); + a_disk_r = -GN * M_d * r_disk * pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), -1.5); + a_disk_z = + -GN * M_d * z_pos * (R_d + sqrt(z_pos * z_pos + z_d * z_d)) / + (pow(r_disk * r_disk + pow2(R_d + sqrt(z_pos * z_pos + z_d * z_d)), 1.5) * sqrt(z_pos * z_pos + z_d * z_d)); + + // total acceleration is the sum of the halo + disk components + *gx = (x_pos / r_disk) * (a_disk_r + a_halo_r); + *gy = (y_pos / r_disk) * (a_disk_r + a_halo_r); + *gz = a_disk_z + a_halo_z; + break; + default: + *gx = 0; + *gy = 0; + *gz = 0; + } return; } - -#endif //CUDA - diff --git a/src/grid/boundary_conditions.cpp b/src/grid/boundary_conditions.cpp index 341360eba..d9201fe8b 100644 --- a/src/grid/boundary_conditions.cpp +++ b/src/grid/boundary_conditions.cpp @@ -2,122 +2,124 @@ * \brief Definitions of the boundary conditions for various tests. Functions are members of the Grid3D class. */ -#include -#include #include +#include +#include #include + +#include "../grid/cuda_boundaries.h" // provides SetGhostCells #include "../grid/grid3D.h" #include "../io/io.h" -#include "../utils/error_handling.h" #include "../mpi/mpi_routines.h" +#include "../utils/error_handling.h" -#include "../grid/cuda_boundaries.h" // provides SetGhostCells - - -/*! \fn void Set_Boundary_Conditions_Grid(parameters P) - * \brief Set the boundary conditions for all components based on info in the parameters structure. */ -void Grid3D::Set_Boundary_Conditions_Grid( parameters P){ - - #ifndef ONLY_PARTICLES +/*! \fn void Set_Boundary_Conditions_Grid(Parameters P ) + * \brief Set the boundary conditions for all components based on info in the + * parameters structure. */ +void Grid3D::Set_Boundary_Conditions_Grid(Parameters P) +{ +#ifndef ONLY_PARTICLES // Dont transfer Hydro boundaries when only doing particles // Transfer Hydro Conserved boundaries #ifdef CPU_TIME Timer.Boundaries.Start(); - #endif //CPU_TIME + #endif // CPU_TIME H.TRANSFER_HYDRO_BOUNDARIES = true; Set_Boundary_Conditions(P); H.TRANSFER_HYDRO_BOUNDARIES = false; #ifdef CPU_TIME Timer.Boundaries.End(); - #endif //CPU_TIME - #endif //ONLY_PARTICLES + #endif // CPU_TIME +#endif // ONLY_PARTICLES - // If the Gravity coupling is on the CPU, the potential is not in the Conserved arrays, - // and its boundaries need to be transferred separately - #ifdef GRAVITY +// If the Gravity coupling is on the CPU, the potential is not in the Conserved +// arrays, and its boundaries need to be transferred separately +#ifdef GRAVITY #ifdef CPU_TIME Timer.Pot_Boundaries.Start(); - #endif + #endif // CPU_TIME Grav.TRANSFER_POTENTIAL_BOUNDARIES = true; Set_Boundary_Conditions(P); Grav.TRANSFER_POTENTIAL_BOUNDARIES = false; #ifdef CPU_TIME Timer.Pot_Boundaries.End(); - #endif - #endif + #endif // CPU_TIME +#endif // GRAVITY } -/*! \fn void Set_Boundary_Conditions(parameters P) - * \brief Set the boundary conditions based on info in the parameters structure. */ -void Grid3D::Set_Boundary_Conditions(parameters P) { - - //Check Only one boundary type id being transferred +/*! \fn void Set_Boundary_Conditions(Parameters P ) + * \brief Set the boundary conditions based on info in the parameters + * structure. */ +void Grid3D::Set_Boundary_Conditions(Parameters P) +{ + // Check Only one boundary type id being transferred int n_bounds = 0; - n_bounds += (int) H.TRANSFER_HYDRO_BOUNDARIES; - #ifdef GRAVITY - n_bounds += (int) Grav.TRANSFER_POTENTIAL_BOUNDARIES; + n_bounds += (int)H.TRANSFER_HYDRO_BOUNDARIES; +#ifdef GRAVITY + n_bounds += (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES; + #ifdef SOR + n_bounds += (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES; + #endif // SOR +#endif // GRAVITY +#ifdef PARTICLES + n_bounds += (int)Particles.TRANSFER_PARTICLES_BOUNDARIES; + n_bounds += (int)Particles.TRANSFER_DENSITY_BOUNDARIES; +#endif // PARTICLES + + if (n_bounds > 1) { + printf( + "ERROR: More than one boundary type for transfer. N boundary types: " + "%d\n", + n_bounds); + printf(" Boundary Hydro: %d\n", (int)H.TRANSFER_HYDRO_BOUNDARIES); +#ifdef GRAVITY + printf(" Boundary Potential: %d\n", (int)Grav.TRANSFER_POTENTIAL_BOUNDARIES); #ifdef SOR - n_bounds += (int) Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES; - #endif //SOR - #endif //GRAVITY - #ifdef PARTICLES - n_bounds += (int) Particles.TRANSFER_PARTICLES_BOUNDARIES; - n_bounds += (int) Particles.TRANSFER_DENSITY_BOUNDARIES; - #endif //PARTICLES - - if ( n_bounds > 1 ){ - printf("ERROR: More than one boundary type for transfer. N boundary types: %d\n", n_bounds ); - printf(" Boundary Hydro: %d\n", (int) H.TRANSFER_HYDRO_BOUNDARIES ); - #ifdef GRAVITY - printf(" Boundary Potential: %d\n", (int) Grav.TRANSFER_POTENTIAL_BOUNDARIES ); - #ifdef SOR - printf(" Boundary Poisson: %d\n", (int) Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ); - #endif //SOR - #endif //GRAVITY - #ifdef PARTICLES - printf(" Boundary Particles: %d\n", (int) Particles.TRANSFER_PARTICLES_BOUNDARIES ); - printf(" Boundary Particles Density: %d\n", (int) Particles.TRANSFER_DENSITY_BOUNDARIES ); - #endif //PARTICLES + printf(" Boundary Poisson: %d\n", (int)Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES); + #endif // SOR +#endif // GRAVITY +#ifdef PARTICLES + printf(" Boundary Particles: %d\n", (int)Particles.TRANSFER_PARTICLES_BOUNDARIES); + printf(" Boundary Particles Density: %d\n", (int)Particles.TRANSFER_DENSITY_BOUNDARIES); +#endif // PARTICLES exit(-1); } // If no boundaries are set to be transferred then exit; - if ( n_bounds == 0 ){ - printf( " Warning: No boundary type for transfer \n"); - return; + if (n_bounds == 0) { + printf(" Warning: No boundary type for transfer \n"); + return; } - #ifndef MPI_CHOLLA - int flags[6] = {0,0,0,0,0,0}; + int flags[6] = {0, 0, 0, 0, 0, 0}; // Check for custom boundary conditions and set boundary flags - if(Check_Custom_Boundary(&flags[0], P)) - { + if (Check_Custom_Boundary(&flags[0], P)) { Custom_Boundary(P.custom_bcnd); } // set regular boundaries - if(H.nx>1) { + if (H.nx > 1) { Set_Boundaries(0, flags); Set_Boundaries(1, flags); } - if(H.ny>1) { + if (H.ny > 1) { Set_Boundaries(2, flags); Set_Boundaries(3, flags); } - if(H.nz>1) { + if (H.nz > 1) { Set_Boundaries(4, flags); Set_Boundaries(5, flags); } #ifdef GRAVITY - Grav.Set_Boundary_Flags( flags ); - #endif //Gravity + Grav.Set_Boundary_Flags(flags); + #endif // Gravity -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /*Set boundaries, including MPI exchanges*/ @@ -126,40 +128,29 @@ void Grid3D::Set_Boundary_Conditions(parameters P) { #endif /*MPI_CHOLLA*/ } - -/*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) +/*! \fn int Check_Custom_Boundary(int *flags, struct Parameters P) * \brief Check for custom boundary conditions and set boundary flags. */ -int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) +int Grid3D::Check_Custom_Boundary(int *flags, struct Parameters P) { - /*check if any boundary is a custom boundary*/ /*if yes, then return 1*/ /*if no, then return 0*/ /*additionally, set a flag for each boundary*/ - if(H.nx>1) - { - *(flags+0) = P.xl_bcnd; - *(flags+1) = P.xu_bcnd; + if (H.nx > 1) { + *(flags + 0) = P.xl_bcnd; + *(flags + 1) = P.xu_bcnd; } - if(H.ny>1) - { - *(flags+2) = P.yl_bcnd; - *(flags+3) = P.yu_bcnd; + if (H.ny > 1) { + *(flags + 2) = P.yl_bcnd; + *(flags + 3) = P.yu_bcnd; } - if(H.nz>1) - { - *(flags+4) = P.zl_bcnd; - *(flags+5) = P.zu_bcnd; + if (H.nz > 1) { + *(flags + 4) = P.zl_bcnd; + *(flags + 5) = P.zu_bcnd; } - for (int i=0; i<6; i++) - { - if (!( (flags[i]>=0)&&(flags[i]<=5) ) ) - { - chprintf("Invalid boundary conditions. Must select between 1 (periodic), 2 (reflective), 3 (transmissive), 4 (custom), 5 (mpi).\n"); - chexit(-1); - } + for (int i = 0; i < 6; i++) { if (flags[i] == 4) { /*custom boundaries*/ return 1; @@ -169,154 +160,258 @@ int Grid3D::Check_Custom_Boundary(int *flags, struct parameters P) return 0; } - - /*! \fn void Set_Boundaries(int dir, int flags[]) * \brief Apply boundary conditions to the grid. */ void Grid3D::Set_Boundaries(int dir, int flags[]) { int i, j, k; - int imin[3] = {0,0,0}; - int imax[3] = {H.nx,H.ny,H.nz}; - Real a[3] = {1,1,1}; //sign of momenta - int idx; //index of a real cell - int gidx; //index of a ghost cell + int imin[3] = {0, 0, 0}; + int imax[3] = {H.nx, H.ny, H.nz}; + Real a[3] = {1, 1, 1}; // sign of momenta + int idx; // index of a real cell + int gidx; // index of a ghost cell int nPB, nBoundaries; int *iaBoundary, *iaCell; /*if the cell face is an custom boundary, exit */ - if(flags[dir]==4) + if (flags[dir] == 4) { return; + } -#ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA /*if the cell face is an mpi boundary, exit */ - if(flags[dir]==5) + if (flags[dir] == 5) { return; + } #endif /*MPI_CHOLLA*/ - - - #ifdef GRAVITY - - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - if ( flags[dir] == 1 ){ - // Set Periodic Boundaries for the ghost cells. - #ifdef GRAVITY_GPU - if ( dir == 0 ) Set_Potential_Boundaries_Periodic_GPU( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Periodic_GPU( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Periodic_GPU( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Periodic_GPU( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Periodic_GPU( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Periodic_GPU( 2, 1, flags ); - #else - if ( dir == 0 ) Set_Potential_Boundaries_Periodic( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Periodic( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Periodic( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Periodic( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Periodic( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Periodic( 2, 1, flags ); - #endif +#ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + if (flags[dir] == 1) { + // Set Periodic Boundaries for the ghost cells. + #ifdef GRAVITY_GPU + if (dir == 0) { + Set_Potential_Boundaries_Periodic_GPU(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Periodic_GPU(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Periodic_GPU(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Periodic_GPU(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Periodic_GPU(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Periodic_GPU(2, 1, flags); + } + #else + if (dir == 0) { + Set_Potential_Boundaries_Periodic(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Periodic(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Periodic(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Periodic(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Periodic(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Periodic(2, 1, flags); + } + #endif } - if ( flags[dir] == 3 ){ - - #ifdef GRAVITY_GPU - if ( dir == 0 ) Set_Potential_Boundaries_Isolated_GPU( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Isolated_GPU( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Isolated_GPU( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Isolated_GPU( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Isolated_GPU( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Isolated_GPU( 2, 1, flags ); - #else - if ( dir == 0 ) Set_Potential_Boundaries_Isolated( 0, 0, flags ); - if ( dir == 1 ) Set_Potential_Boundaries_Isolated( 0, 1, flags ); - if ( dir == 2 ) Set_Potential_Boundaries_Isolated( 1, 0, flags ); - if ( dir == 3 ) Set_Potential_Boundaries_Isolated( 1, 1, flags ); - if ( dir == 4 ) Set_Potential_Boundaries_Isolated( 2, 0, flags ); - if ( dir == 5 ) Set_Potential_Boundaries_Isolated( 2, 1, flags ); - #endif//GRAVITY_GPU + if (flags[dir] == 3) { + #ifdef GRAVITY_GPU + if (dir == 0) { + Set_Potential_Boundaries_Isolated_GPU(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Isolated_GPU(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Isolated_GPU(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Isolated_GPU(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Isolated_GPU(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Isolated_GPU(2, 1, flags); + } + #else + if (dir == 0) { + Set_Potential_Boundaries_Isolated(0, 0, flags); + } + if (dir == 1) { + Set_Potential_Boundaries_Isolated(0, 1, flags); + } + if (dir == 2) { + Set_Potential_Boundaries_Isolated(1, 0, flags); + } + if (dir == 3) { + Set_Potential_Boundaries_Isolated(1, 1, flags); + } + if (dir == 4) { + Set_Potential_Boundaries_Isolated(2, 0, flags); + } + if (dir == 5) { + Set_Potential_Boundaries_Isolated(2, 1, flags); + } + #endif // GRAVITY_GPU } return; } #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ){ - if ( flags[dir] ==1 ){ - if ( dir == 0 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 0, 0 ); - if ( dir == 1 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 0, 1 ); - if ( dir == 2 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 1, 0 ); - if ( dir == 3 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 1, 1 ); - if ( dir == 4 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 2, 0 ); - if ( dir == 5 ) Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic( 2, 1 ); + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) { + if (flags[dir] == 1) { + if (dir == 0) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 0); + } + if (dir == 1) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(0, 1); + } + if (dir == 2) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 0); + } + if (dir == 3) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(1, 1); + } + if (dir == 4) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 0); + } + if (dir == 5) { + Grav.Poisson_solver.Copy_Poisson_Boundary_Periodic(2, 1); + } } return; } - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES ){ - if ( flags[dir] ==1 ){ - // Set Periodic Boundaries for the particles density. - #ifdef PARTICLES_GPU - if ( dir == 0 ) Set_Particles_Density_Boundaries_Periodic_GPU( 0, 0 ); - if ( dir == 1 ) Set_Particles_Density_Boundaries_Periodic_GPU( 0, 1 ); - if ( dir == 2 ) Set_Particles_Density_Boundaries_Periodic_GPU( 1, 0 ); - if ( dir == 3 ) Set_Particles_Density_Boundaries_Periodic_GPU( 1, 1 ); - if ( dir == 4 ) Set_Particles_Density_Boundaries_Periodic_GPU( 2, 0 ); - if ( dir == 5 ) Set_Particles_Density_Boundaries_Periodic_GPU( 2, 1 ); - #endif - #ifdef PARTICLES_CPU - if ( dir == 0 ) Set_Particles_Density_Boundaries_Periodic( 0, 0 ); - if ( dir == 1 ) Set_Particles_Density_Boundaries_Periodic( 0, 1 ); - if ( dir == 2 ) Set_Particles_Density_Boundaries_Periodic( 1, 0 ); - if ( dir == 3 ) Set_Particles_Density_Boundaries_Periodic( 1, 1 ); - if ( dir == 4 ) Set_Particles_Density_Boundaries_Periodic( 2, 0 ); - if ( dir == 5 ) Set_Particles_Density_Boundaries_Periodic( 2, 1 ); - #endif + #endif // SOR +#endif // GRAVITY + +#ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + if (flags[dir] == 1) { + // Set Periodic Boundaries for the particles density. + #ifdef PARTICLES_GPU + if (dir == 0) { + Set_Particles_Density_Boundaries_Periodic_GPU(0, 0); + } + if (dir == 1) { + Set_Particles_Density_Boundaries_Periodic_GPU(0, 1); + } + if (dir == 2) { + Set_Particles_Density_Boundaries_Periodic_GPU(1, 0); + } + if (dir == 3) { + Set_Particles_Density_Boundaries_Periodic_GPU(1, 1); + } + if (dir == 4) { + Set_Particles_Density_Boundaries_Periodic_GPU(2, 0); + } + if (dir == 5) { + Set_Particles_Density_Boundaries_Periodic_GPU(2, 1); + } + #endif + #ifdef PARTICLES_CPU + if (dir == 0) { + Set_Particles_Density_Boundaries_Periodic(0, 0); + } + if (dir == 1) { + Set_Particles_Density_Boundaries_Periodic(0, 1); + } + if (dir == 2) { + Set_Particles_Density_Boundaries_Periodic(1, 0); + } + if (dir == 3) { + Set_Particles_Density_Boundaries_Periodic(1, 1); + } + if (dir == 4) { + Set_Particles_Density_Boundaries_Periodic(2, 0); + } + if (dir == 5) { + Set_Particles_Density_Boundaries_Periodic(2, 1); + } + #endif } return; } - #endif //PARTICLES - - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - if ( flags[dir] ==1 ){ - #ifdef PARTICLES_CPU - if ( dir == 0 ) Set_Particles_Boundary( 0, 0 ); - if ( dir == 1 ) Set_Particles_Boundary( 0, 1 ); - if ( dir == 2 ) Set_Particles_Boundary( 1, 0 ); - if ( dir == 3 ) Set_Particles_Boundary( 1, 1 ); - if ( dir == 4 ) Set_Particles_Boundary( 2, 0 ); - if ( dir == 5 ) Set_Particles_Boundary( 2, 1 ); - #endif//PARTICLES_CPU - - #ifdef PARTICLES_GPU - if ( dir == 0 ) Set_Particles_Boundary_GPU( 0, 0 ); - if ( dir == 1 ) Set_Particles_Boundary_GPU( 0, 1 ); - if ( dir == 2 ) Set_Particles_Boundary_GPU( 1, 0 ); - if ( dir == 3 ) Set_Particles_Boundary_GPU( 1, 1 ); - if ( dir == 4 ) Set_Particles_Boundary_GPU( 2, 0 ); - if ( dir == 5 ) Set_Particles_Boundary_GPU( 2, 1 ); - #endif//PARTICLES_GPU - - - } else if (flags[dir] == 3) { - #ifdef PARTICLES_CPU - Set_Particles_Open_Boundary(dir/2, dir%2); - #endif //PARTICLES_CPU +#endif // PARTICLES + +#ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + if (flags[dir] == 1) { + #ifdef PARTICLES_CPU + if (dir == 0) { + Set_Particles_Boundary(0, 0); + } + if (dir == 1) { + Set_Particles_Boundary(0, 1); + } + if (dir == 2) { + Set_Particles_Boundary(1, 0); + } + if (dir == 3) { + Set_Particles_Boundary(1, 1); + } + if (dir == 4) { + Set_Particles_Boundary(2, 0); + } + if (dir == 5) { + Set_Particles_Boundary(2, 1); + } + #endif // PARTICLES_CPU + + #ifdef PARTICLES_GPU + if (dir == 0) { + Set_Particles_Boundary_GPU(0, 0); + } + if (dir == 1) { + Set_Particles_Boundary_GPU(0, 1); + } + if (dir == 2) { + Set_Particles_Boundary_GPU(1, 0); + } + if (dir == 3) { + Set_Particles_Boundary_GPU(1, 1); + } + if (dir == 4) { + Set_Particles_Boundary_GPU(2, 0); + } + if (dir == 5) { + Set_Particles_Boundary_GPU(2, 1); + } + #endif // PARTICLES_GPU + + } else if (flags[dir] == 3) { + #ifdef PARTICLES_CPU + Set_Particles_Open_Boundary_CPU(dir / 2, dir % 2); + #endif + #ifdef PARTICLES_GPU + Particles.Set_Particles_Open_Boundary_GPU(dir / 2, dir % 2); + #endif } return; } - #endif//PARTICLES +#endif // PARTICLES - //get the extents of the ghost region we are initializing + // get the extents of the ghost region we are initializing Set_Boundary_Extents(dir, &imin[0], &imax[0]); // from grid/cuda_boundaries.cu - SetGhostCells(C.device, - H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags, - imax[0]-imin[0], imax[1]-imin[1], imax[2]-imin[2], - imin[0], imin[1], imin[2], dir); + SetGhostCells(C.device, H.nx, H.ny, H.nz, H.n_fields, H.n_cells, H.n_ghost, flags, imax[0] - imin[0], + imax[1] - imin[1], imax[2] - imin[2], imin[0], imin[1], imin[2], dir); } /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) @@ -330,108 +425,118 @@ void Grid3D::Set_Boundary_Extents(int dir, int *imin, int *imax) ku = H.nz; if (H.ny > 1) { jl = H.n_ghost; - ju = H.ny-H.n_ghost; + ju = H.ny - H.n_ghost; } if (H.nz > 1) { kl = H.n_ghost; - ku = H.nz-H.n_ghost; + ku = H.nz - H.n_ghost; } il = 0; iu = H.n_ghost; /*lower x face*/ - if(dir==0) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 0) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } - il = H.nx-H.n_ghost; + il = H.nx - H.n_ghost; iu = H.nx; /*upper x face*/ - if(dir==1) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 1) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } il = 0; iu = H.nx; jl = 0; ju = H.n_ghost; /*lower y face*/ - if(dir==2) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 2) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } - jl = H.ny-H.n_ghost; + jl = H.ny - H.n_ghost; ju = H.ny; /*upper y face*/ - if(dir==3) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 3) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } jl = 0; ju = H.ny; kl = 0; ku = H.n_ghost; /*lower z face*/ - if(dir==4) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 4) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } - kl = H.nz-H.n_ghost; + kl = H.nz - H.n_ghost; ku = H.nz; /*upper z face*/ - if(dir==5) - { - *(imin) = il; - *(imax) = iu; - *(imin+1) = jl; - *(imax+1) = ju; - *(imin+2) = kl; - *(imax+2) = ku; + if (dir == 5) { + *(imin) = il; + *(imax) = iu; + *(imin + 1) = jl; + *(imax + 1) = ju; + *(imin + 2) = kl; + *(imax + 2) = ku; } } - - /*! \fn void Custom_Boundary(char bcnd[MAXLEN]) * \brief Select appropriate custom boundary function. */ void Grid3D::Custom_Boundary(char bcnd[MAXLEN]) { - if (strcmp(bcnd, "noh")==0) { + if (strcmp(bcnd, "noh") == 0) { // from grid/cuda_boundaries.cu Noh_Boundary(); - } - else { + } else if (strcmp(bcnd, "wind") == 0) { + // from grid/cuda_boundaries.cu + Wind_Boundary(); + } else { printf("ABORT: %s -> Unknown custom boundary condition.\n", bcnd); exit(0); } } +/*! \fn void Wind_Boundary() + * \brief Apply wind boundary */ +void Grid3D::Wind_Boundary() +{ + int x_off, y_off, z_off; + // set x, y, & z offsets of local CPU volume to pass to GPU + // so global position on the grid is known + x_off = y_off = z_off = 0; +#ifdef MPI_CHOLLA + x_off = nx_local_start; + y_off = ny_local_start; + z_off = nz_local_start; +#endif + Wind_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound, + H.ybound, H.zbound, gama, H.t); +} /*! \fn void Noh_Boundary() * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, @@ -445,106 +550,91 @@ void Grid3D::Noh_Boundary() // set x, y, & z offsets of local CPU volume to pass to GPU // so global position on the grid is known x_off = y_off = z_off = 0; - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA x_off = nx_local_start; y_off = ny_local_start; z_off = nz_local_start; - #endif - - Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, - x_off, y_off, z_off, H.dx, H.dy, H.dz, - H.xbound, H.ybound, H.zbound, gama, H.t); - -/* - int i, j, k, id; - Real x_pos, y_pos, z_pos, r; - Real vx, vy, vz, d_0, P_0, P; - - d_0 = 1.0; - P_0 = 1.0e-6; - - // set exact boundaries on the +x face - for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); - else r = sqrt(x_pos*x_pos + y_pos*y_pos); - // set the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (H.nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); - else C.density[id] = d_0*(1.0 + H.t/r); - C.momentum_x[id] = vx*C.density[id]; - C.momentum_y[id] = vy*C.density[id]; - C.momentum_z[id] = vz*C.density[id]; - C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; - - } - } - } - - // set exact boundaries on the +y face - for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); - else r = sqrt(x_pos*x_pos + y_pos*y_pos); - // set the velocities - vx = -x_pos / r; - vy = -y_pos / r; - if (H.nz > 1) vz = -z_pos / r; - else vz = 0; - // set the conserved quantities - if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); - else C.density[id] = d_0*(1.0 + H.t/r); - C.momentum_x[id] = vx*C.density[id]; - C.momentum_y[id] = vy*C.density[id]; - C.momentum_z[id] = vz*C.density[id]; - C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; - +#endif + + Noh_Boundary_CUDA(C.device, H.nx, H.ny, H.nz, H.n_cells, H.n_ghost, x_off, y_off, z_off, H.dx, H.dy, H.dz, H.xbound, + H.ybound, H.zbound, gama, H.t); + + /* + int i, j, k, id; + Real x_pos, y_pos, z_pos, r; + Real vx, vy, vz, d_0, P_0, P; + d_0 = 1.0; + P_0 = 1.0e-6; + // set exact boundaries on the +x face + for (k=0; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); + else r = sqrt(x_pos*x_pos + y_pos*y_pos); + // set the velocities + vx = -x_pos / r; + vy = -y_pos / r; + if (H.nz > 1) vz = -z_pos / r; + else vz = 0; + // set the conserved quantities + if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + else C.density[id] = d_0*(1.0 + H.t/r); + C.momentum_x[id] = vx*C.density[id]; + C.momentum_y[id] = vy*C.density[id]; + C.momentum_z[id] = vz*C.density[id]; + C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; + } } } - } - - // set exact boundaries on the +z face - if (H.nz > 1) { - - for (k=H.nz-H.n_ghost; k 1) r = sqrt(x_pos*x_pos + y_pos*y_pos+ z_pos*z_pos); + else r = sqrt(x_pos*x_pos + y_pos*y_pos); // set the velocities vx = -x_pos / r; vy = -y_pos / r; - vz = -z_pos / r; + if (H.nz > 1) vz = -z_pos / r; + else vz = 0; // set the conserved quantities - C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + if (H.nz > 1) C.density[id] = d_0*(1.0 + H.t/r)*(1.0 + H.t/r); + else C.density[id] = d_0*(1.0 + H.t/r); C.momentum_x[id] = vx*C.density[id]; C.momentum_y[id] = vy*C.density[id]; C.momentum_z[id] = vz*C.density[id]; C.Energy[id] = P_0/(gama-1.0) + 0.5*C.density[id]; - } } } - - } -*/ + // set exact boundaries on the +z face + if (H.nz > 1) { + for (k=H.nz-H.n_ghost; k= buffer_ncells){ + if (id >= buffer_ncells) { return; } - k = id/(isize*jsize); - j = (id - k*isize*jsize)/isize; - i = id - k*isize*jsize - j*isize; - idx = i + (j+k*ny)*nx + idxoffset; + k = id / (isize * jsize); + j = (id - k * isize * jsize) / isize; + i = id - k * isize * jsize - j * isize; + idx = i + (j + k * ny) * nx + idxoffset; // idxoffset contains offset terms from // idx = (i+ioffset) + (j+joffset)*H.nx + (k+koffset)*H.nx*H.ny; - for (ii=0; ii= buffer_ncells){ + if (id >= buffer_ncells) { return; } - k = id/(isize*jsize); - j = (id - k*isize*jsize)/isize; - i = id - k*isize*jsize - j*isize; - idx = i + (j+k*ny)*nx + idxoffset; - for (ii=0; ii=isize*jsize*ksize){ + k = id / (isize * jsize); + j = (id - k * isize * jsize) / isize; + i = id - k * isize * jsize - j * isize; + if (id >= isize * jsize * ksize) { return; } // true i,j,k conversion i += imin; j += jmin; k += kmin; - gidx = i + j*nx + k*nx*ny; + gidx = i + j * nx + k * nx * ny; // calculate idx (index of real cell) and a[:] for reflection - idx = SetBoundaryMapping(i,j,k,&a[0],flags,nx,ny,nz,n_ghost,magneticIdx); - - if (idx>=0){ - for (ii=0; ii= 0) { + for (ii = 0; ii < n_fields; ii++) { + c_head[gidx + ii * n_cells] = c_head[idx + ii * n_cells]; } // momentum correction for reflection - // these are set to -1 whenever ghost cells in a direction are in a reflective boundary condition - if (flags[0]==2 || flags[1]==2){ + // these are set to -1 whenever ghost cells in a direction are in a + // reflective boundary condition + if (flags[0] == 2 || flags[1] == 2) { c_head[gidx + n_cells] *= a[0]; } - if (flags[2]==2 || flags[3]==2){ - c_head[gidx + 2*n_cells] *= a[1]; + if (flags[2] == 2 || flags[3] == 2) { + c_head[gidx + 2 * n_cells] *= a[1]; } - if (flags[4]==2 || flags[5]==2){ - c_head[gidx + 3*n_cells] *= a[2]; + if (flags[4] == 2 || flags[5] == 2) { + c_head[gidx + 3 * n_cells] *= a[2]; } + +#ifndef MHD // energy and momentum correction for transmission // Diode: only allow outflow - if (flags[dir] == 3){ + if (flags[dir] == 3) { // - int momdex = gidx + (dir/2+1)*n_cells; + int momdex = gidx + (dir / 2 + 1) * n_cells; // (X) Dir 0,1 -> Mom 1 -> c_head[gidx+1*n_cells] // (Y) Dir 2,3 -> Mom 2 -> c_head[gidx+2*n_cells] // (Z) Dir 4,5 -> Mom 3 -> c_head[gidx+3*n_cells] // If a momentum is set to 0, subtract its kinetic energy [gidx+4*n_cells] - if (dir%2 == 0){ - // Direction 0,2,4 are left-side, don't allow inflow with positive momentum - if (c_head[momdex] > 0.0) { - c_head[gidx+4*n_cells] -= 0.5*(c_head[momdex]*c_head[momdex])/c_head[gidx]; - c_head[momdex] = 0.0; - } + if (dir % 2 == 0) { + // Direction 0,2,4 are left-side, don't allow inflow with positive + // momentum + if (c_head[momdex] > 0.0) { + c_head[gidx + 4 * n_cells] -= 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; + c_head[momdex] = 0.0; + } } else { - // Direction 1,3,5 are right-side, don't allow inflow with negative momentum - if (c_head[momdex] < 0.0) { - c_head[gidx+4*n_cells] -= 0.5*(c_head[momdex]*c_head[momdex])/c_head[gidx]; - c_head[momdex] = 0.0; - } + // Direction 1,3,5 are right-side, don't allow inflow with negative + // momentum + if (c_head[momdex] < 0.0) { + c_head[gidx + 4 * n_cells] -= 0.5 * (c_head[momdex] * c_head[momdex]) / c_head[gidx]; + c_head[momdex] = 0.0; + } } - }//end energy correction for transmissive boundaries - }//end idx>=0 -}//end function - -void SetGhostCells(Real * c_head, - int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], - int isize, int jsize, int ksize, - int imin, int jmin, int kmin, int dir) + } // end energy correction for transmissive boundaries +#endif // not MHD + } // end idx>=0 +} // end function + +void SetGhostCells(Real *c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize, + int jsize, int ksize, int imin, int jmin, int kmin, int dir) { - dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(SetGhostCellsKernel,dim1dGrid,dim1dBlock,0,0,c_head, - nx,ny,nz,n_fields,n_cells,n_ghost, - flags[0],flags[1],flags[2],flags[3],flags[4],flags[5], - isize,jsize,ksize,imin,jmin,kmin,dir); - + hipLaunchKernelGGL(SetGhostCellsKernel, dim1dGrid, dim1dBlock, 0, 0, c_head, nx, ny, nz, n_fields, n_cells, n_ghost, + flags[0], flags[1], flags[2], flags[3], flags[4], flags[5], isize, jsize, ksize, imin, jmin, kmin, + dir); } -__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost, int &magneticIdx){ +__device__ int SetBoundaryMapping(int ig, int jg, int kg, Real *a, int flags[], int nx, int ny, int nz, int n_ghost) +{ // nx, ny, nz, n_ghost /* 1D */ - // irMag, jrMag, krMag are the magnetic indices - int ir, jr, kr, irMag, jrMag, krMag, idx; - ir=jr=kr=irMag=jrMag=krMag=idx=magneticIdx=0; - if (nx>1) { - + int ir, jr, kr, idx; + ir = jr = kr = idx = 0; + if (nx > 1) { // set index on -x face if (ig < n_ghost) { - ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0], irMag); + ir = FindIndex(ig, nx, flags[0], 0, n_ghost, &a[0]); } // set index on +x face - else if (ig >= nx-n_ghost) { - ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0], irMag); + else if (ig >= nx - n_ghost) { + ir = FindIndex(ig, nx, flags[1], 1, n_ghost, &a[0]); } // set i index for multi-D problems else { ir = ig; - #ifdef MHD - irMag = ig; - #endif //MHD } // if custom x boundaries are needed, set index to -1 and return if (ir < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add i index to ghost cell mapping idx += ir; - #ifdef MHD - magneticIdx += irMag; - #endif //MHD - } /* 2D */ if (ny > 1) { - // set index on -y face if (jg < n_ghost) { - jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1], jrMag); + jr = FindIndex(jg, ny, flags[2], 0, n_ghost, &a[1]); } // set index on +y face - else if (jg >= ny-n_ghost) { - jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1], jrMag); + else if (jg >= ny - n_ghost) { + jr = FindIndex(jg, ny, flags[3], 1, n_ghost, &a[1]); } // set j index for multi-D problems else { jr = jg; - #ifdef MHD - jrMag = jg; - #endif //MHD } // if custom y boundaries are needed, set index to -1 and return if (jr < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add j index to ghost cell mapping - idx += nx*jr; - #ifdef MHD - magneticIdx += nx*jrMag; - #endif //MHD - + idx += nx * jr; } /* 3D */ if (nz > 1) { - // set index on -z face if (kg < n_ghost) { - kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2], krMag); + kr = FindIndex(kg, nz, flags[4], 0, n_ghost, &a[2]); } // set index on +z face - else if (kg >= nz-n_ghost) { - kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2], krMag); + else if (kg >= nz - n_ghost) { + kr = FindIndex(kg, nz, flags[5], 1, n_ghost, &a[2]); } // set k index for multi-D problems else { kr = kg; - #ifdef MHD - krMag = kg; - #endif //MHD } // if custom z boundaries are needed, set index to -1 and return if (kr < 0) { - #ifdef MHD - magneticIdx = -1; - #endif //MHD return idx = -1; } // otherwise add k index to ghost cell mapping - idx += nx*ny*kr; - #ifdef MHD - magneticIdx += nx*ny*krMag; - #endif //MHD + idx += nx * ny * kr; } return idx; } -__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a, int &idMag){ +__device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real *a) +{ int id; // lower face - if (face==0) - { - switch(flag) - { + if (face == 0) { + switch (flag) { // periodic case 1: - id = ig+nx-2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD + id = ig + nx - 2 * n_ghost; break; // reflective case 2: - id = 2*n_ghost-ig-1; + id = 2 * n_ghost - ig - 1; *(a) = -1.0; - #ifdef MHD - idMag = id - 1; - #endif //MHD break; // transmissive case 3: id = n_ghost; - #ifdef MHD - idMag = id - 1; - #endif //MHD break; // custom case 4: id = -1; - #ifdef MHD - idMag = -1; - #endif //MHD break; // MPI case 5: id = ig; - #ifdef MHD - idMag = id; - #endif //MHD break; // default is periodic default: - id = ig+nx-2*n_ghost; - #ifdef MHD - idMag = id; - #endif //MHD + id = ig + nx - 2 * n_ghost; } } // upper face - else - { - switch(flag) - { + else { + switch (flag) { // periodic case 1: - id = ig-nx+2*n_ghost; + id = ig - nx + 2 * n_ghost; break; // reflective case 2: - id = 2*(nx-n_ghost)-ig-1; + id = 2 * (nx - n_ghost) - ig - 1; *(a) = -1.0; - break; + break; // transmissive case 3: - id = nx-n_ghost-1; + id = nx - n_ghost - 1; break; // custom case 4: @@ -347,27 +296,63 @@ __device__ int FindIndex(int ig, int nx, int flag, int face, int n_ghost, Real * break; // default is periodic default: - id = ig-nx+2*n_ghost; + id = ig - nx + 2 * n_ghost; } - #ifdef MHD - idMag = id; - #endif //MHD } return id; } +__global__ void Wind_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, + int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, + Real zbound, Real gamma, Real t) +{ + int id, xid, yid, zid, gid; + Real n_0, T_0; + Real mu = 0.6; + Real vx, vy, vz, d_0, P_0; + + n_0 = 1e-2; // same value as n_bg in cloud initial condition function (cm^-3) + T_0 = 3e6; // same value as T_bg in cloud initial condition function (K) + + // same values as rho_bg and p_bg in cloud initial condition function + d_0 = n_0 * mu * MP / DENSITY_UNIT; + P_0 = n_0 * KB * T_0 / PRESSURE_UNIT; + + vx = 100 * TIME_UNIT / KPC; // km/s * (cholla unit conversion) + vy = 0.0; + vz = 0.0; -__global__ void Noh_Boundary_kernel(Real * c_device, - int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, - Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) + // calculate ghost cell ID and i,j,k in GPU grid + id = threadIdx.x + blockIdx.x * blockDim.x; + + // not true i,j,k but relative i,j,k in the GPU grid + cuda_utilities::compute3DIndices(id, n_ghost, ny, xid, yid, zid); + + // map thread id to ghost cell id + xid += 0; // -x boundary + gid = xid + yid * nx + zid * nx * ny; + + if (xid <= n_ghost && xid < nx && yid < ny && zid < nz) { + // set conserved variables + c_device[gid] = d_0; + c_device[gid + 1 * n_cells] = vx * d_0; + c_device[gid + 2 * n_cells] = vy * d_0; + c_device[gid + 3 * n_cells] = vz * d_0; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * d_0 * (vx * vx + vy * vy + vz * vz); + } + __syncthreads(); +} + +__global__ void Noh_Boundary_kernel(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, + int y_off, int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, + Real zbound, Real gamma, Real t) { - int id,xid,yid,zid,gid; + int id, xid, yid, zid, gid; Real x_pos, y_pos, z_pos, r; Real vx, vy, vz, d_0, P_0; d_0 = 1.0; - P_0 = 1.0e-6; + P_0 = 1.0e-6; // calculate ghost cell ID and i,j,k in GPU grid id = threadIdx.x + blockIdx.x * blockDim.x; @@ -381,40 +366,48 @@ __global__ void Noh_Boundary_kernel(Real * c_device, // ksize = nz; // not true i,j,k but relative i,j,k in the GPU grid - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - xid += nx-n_ghost; // +x boundary - gid = xid + yid*nx + zid*nx*ny; - - if (xid >= nx-n_ghost && xid < nx && yid < ny && zid < nz) { + xid += nx - n_ghost; // +x boundary + gid = xid + yid * nx + zid * nx * ny; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + if (xid >= nx - n_ghost && xid < nx && yid < ny && zid < nz) { + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); - // for 3D calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + if (nz == 1) { + r = sqrt(x_pos * x_pos + y_pos * y_pos); + // for 3D calculate spherical r + } else { + r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + } // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; + if (nz > 1) { + vz = -z_pos / r; + } else { + vz = 0; + } // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; + if (nz > 1) { + c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); + } else { + c_device[gid] = d_0 * (1.0 + t / r); + } + c_device[gid + 1 * n_cells] = vx * c_device[gid]; + c_device[gid + 2 * n_cells] = vy * c_device[gid]; + c_device[gid + 3 * n_cells] = vz * c_device[gid]; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid]; } - __syncthreads(); // +y boundary next isize = nx; @@ -422,90 +415,107 @@ __global__ void Noh_Boundary_kernel(Real * c_device, // ksize = nz; // not true i,j,k but relative i,j,k - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - yid += ny-n_ghost; // +y boundary - gid = xid + yid*nx + zid*nx*ny; + yid += ny - n_ghost; // +y boundary + gid = xid + yid * nx + zid * nx * ny; - if (xid < nx && yid >= ny-n_ghost && yid < ny && zid < nz) { - - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + if (xid < nx && yid >= ny - n_ghost && yid < ny && zid < nz) { + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); - // for 3D, calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + if (nz == 1) { + r = sqrt(x_pos * x_pos + y_pos * y_pos); + // for 3D, calculate spherical r + } else { + r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + } // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; + if (nz > 1) { + vz = -z_pos / r; + } else { + vz = 0; + } // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } - __syncthreads(); + if (nz > 1) { + c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); + } else { + c_device[gid] = d_0 * (1.0 + t / r); + } + c_device[gid + 1 * n_cells] = vx * c_device[gid]; + c_device[gid + 2 * n_cells] = vy * c_device[gid]; + c_device[gid + 3 * n_cells] = vz * c_device[gid]; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid]; + } + __syncthreads(); // +z boundary last (only if 3D) - if (nz == 1) return; + if (nz == 1) { + return; + } isize = nx; jsize = ny; // ksize = n_ghost; // not true i,j,k but relative i,j,k - zid = id/(isize*jsize); - yid = (id - zid*isize*jsize)/isize; - xid = id - zid*isize*jsize - yid*isize; + zid = id / (isize * jsize); + yid = (id - zid * isize * jsize) / isize; + xid = id - zid * isize * jsize - yid * isize; // map thread id to ghost cell id - zid += nz-n_ghost; // +z boundary - gid = xid + yid*nx + zid*nx*ny; - - if (xid < nx && yid < ny && zid >= nz-n_ghost && zid < nz) { + zid += nz - n_ghost; // +z boundary + gid = xid + yid * nx + zid * nx * ny; - // use the subgrid offset and global boundaries to calculate absolute positions on the grid - x_pos = (x_off + xid - n_ghost + 0.5)*dx + xbound; - y_pos = (y_off + yid - n_ghost + 0.5)*dy + ybound; - z_pos = (z_off + zid - n_ghost + 0.5)*dz + zbound; + if (xid < nx && yid < ny && zid >= nz - n_ghost && zid < nz) { + // use the subgrid offset and global boundaries to calculate absolute + // positions on the grid + x_pos = (x_off + xid - n_ghost + 0.5) * dx + xbound; + y_pos = (y_off + yid - n_ghost + 0.5) * dy + ybound; + z_pos = (z_off + zid - n_ghost + 0.5) * dz + zbound; // for 2D calculate polar r - if (nz == 1) r = sqrt(x_pos*x_pos + y_pos*y_pos); - // for 3D, calculate spherical r - else r = sqrt(x_pos*x_pos + y_pos*y_pos + z_pos*z_pos); + if (nz == 1) { + r = sqrt(x_pos * x_pos + y_pos * y_pos); + // for 3D, calculate spherical r + } else { + r = sqrt(x_pos * x_pos + y_pos * y_pos + z_pos * z_pos); + } // calculate the velocities vx = -x_pos / r; vy = -y_pos / r; - if (nz > 1) vz = -z_pos / r; - else vz = 0; + if (nz > 1) { + vz = -z_pos / r; + } else { + vz = 0; + } // set the conserved quantities - if (nz > 1) c_device[gid] = d_0*(1.0 + t/r)*(1.0 + t/r); - else c_device[gid] = d_0*(1.0 + t/r); - c_device[gid+1*n_cells] = vx*c_device[gid]; - c_device[gid+2*n_cells] = vy*c_device[gid]; - c_device[gid+3*n_cells] = vz*c_device[gid]; - c_device[gid+4*n_cells] = P_0/(gamma-1.0) + 0.5*c_device[gid]; - } + if (nz > 1) { + c_device[gid] = d_0 * (1.0 + t / r) * (1.0 + t / r); + } else { + c_device[gid] = d_0 * (1.0 + t / r); + } + c_device[gid + 1 * n_cells] = vx * c_device[gid]; + c_device[gid + 2 * n_cells] = vy * c_device[gid]; + c_device[gid + 3 * n_cells] = vz * c_device[gid]; + c_device[gid + 4 * n_cells] = P_0 / (gamma - 1.0) + 0.5 * c_device[gid]; + } } - -void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t) +void Wind_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) { - // determine the size of the grid to launch // need at least as many threads as the largest boundary face // current implementation assumes the test is run on a cube... @@ -514,16 +524,29 @@ void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int jsize = ny; ksize = nz; - dim3 dim1dGrid((isize*jsize*ksize+TPB-1)/TPB, 1, 1); + dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); // launch the boundary kernel - hipLaunchKernelGGL(Noh_Boundary_kernel,dim1dGrid,dim1dBlock,0,0,c_device, - nx,ny,nz,n_cells,n_ghost, - x_off,y_off,z_off,dx,dy,dz,xbound,ybound,zbound,gamma,t); - - - + hipLaunchKernelGGL(Wind_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off, + y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); } +void Noh_Boundary_CUDA(Real *c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t) +{ + // determine the size of the grid to launch + // need at least as many threads as the largest boundary face + // current implementation assumes the test is run on a cube... + int isize, jsize, ksize; + isize = n_ghost; + jsize = ny; + ksize = nz; + + dim3 dim1dGrid((isize * jsize * ksize + TPB - 1) / TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + // launch the boundary kernel + hipLaunchKernelGGL(Noh_Boundary_kernel, dim1dGrid, dim1dBlock, 0, 0, c_device, nx, ny, nz, n_cells, n_ghost, x_off, + y_off, z_off, dx, dy, dz, xbound, ybound, zbound, gamma, t); +} \ No newline at end of file diff --git a/src/grid/cuda_boundaries.h b/src/grid/cuda_boundaries.h index f7212401a..bbf0a5ab8 100644 --- a/src/grid/cuda_boundaries.h +++ b/src/grid/cuda_boundaries.h @@ -1,21 +1,23 @@ -#ifdef CUDA -#include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" +#include "../utils/gpu.hpp" -//void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); -void PackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize); +// void PackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int +// ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); +void PackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, + int jsize, int ksize); -void UnpackBuffers3D(Real * buffer, Real * c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, int jsize, int ksize); -//void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); +void UnpackBuffers3D(Real* buffer, Real* c_head, int nx, int ny, int n_fields, int n_cells, int idxoffset, int isize, + int jsize, int ksize); +// void UnpackBuffers3D(Real * buffer, Real * c_head, int isize, int jsize, int +// ksize, int nx, int ny, int idxoffset, int offset, int n_fields, int n_cells); -void SetGhostCells(Real * c_head, - int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], - int isize, int jsize, int ksize, - int imin, int jmin, int kmin, int dir); +void SetGhostCells(Real* c_head, int nx, int ny, int nz, int n_fields, int n_cells, int n_ghost, int flags[], int isize, + int jsize, int ksize, int imin, int jmin, int kmin, int dir); -void Noh_Boundary_CUDA(Real * c_device, int nx, int ny, int nz, int n_cells, int n_ghost, - int x_off, int y_off, int z_off, Real dx, Real dy, Real dz, - Real xbound, Real ybound, Real zbound, Real gamma, Real t); +void Wind_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, + Real t); -#endif +void Noh_Boundary_CUDA(Real* c_device, int nx, int ny, int nz, int n_cells, int n_ghost, int x_off, int y_off, + int z_off, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real gamma, Real t); diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 03c1dc7c1..ef4d57928 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -1,43 +1,46 @@ /*! \file grid3D.cpp * \brief Definitions of the Grid3D class */ -#include #include +#include #include #ifdef HDF5 -#include + #include #endif #include "../global/global.h" #include "../grid/grid3D.h" -#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU +#include "../grid/grid_enum.h" // provides grid_enum +#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU #include "../integrators/VL_1D_cuda.h" #include "../integrators/VL_2D_cuda.h" #include "../integrators/VL_3D_cuda.h" -#include "../io/io.h" -#include "../utils/error_handling.h" -#include "../utils/ran.h" #include "../integrators/simple_1D_cuda.h" #include "../integrators/simple_2D_cuda.h" #include "../integrators/simple_3D_cuda.h" +#include "../io/io.h" +#include "../utils/error_handling.h" #ifdef MPI_CHOLLA -#include -#ifdef HDF5 -#include -#endif -#include "../mpi/mpi_routines.h" + #include + #ifdef HDF5 + #include + #endif + #include "../mpi/mpi_routines.h" #endif #include #ifdef CLOUDY_COOL -#include "../cooling/load_cloudy_texture.h" // provides Load_Cuda_Textures and Free_Cuda_Textures + #include "../cooling/load_cloudy_texture.h" // provides Load_Cuda_Textures and Free_Cuda_Textures #endif #ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" + #include "../utils/parallel_omp.h" #endif #ifdef COOLING_GPU -#include "../cooling/cooling_cuda.h" // provides Cooling_Update + #include "../cooling/cooling_cuda.h" // provides Cooling_Update #endif +#ifdef DUST + #include "../dust/dust_cuda.h" // provides Dust_Update +#endif /*! \fn Grid3D(void) * \brief Constructor for the Grid. */ @@ -46,115 +49,137 @@ Grid3D::Grid3D(void) // set initialization flag to 0 flag_init = 0; - // set number of ghost cells - #ifdef PCM +// set number of ghost cells +#ifdef PCM H.n_ghost = 2; - #endif //PCM - #ifdef PLMP +#endif // PCM +#ifdef PLMP H.n_ghost = 3; - #endif //PLMP - #ifdef PLMC +#endif // PLMP +#ifdef PLMC H.n_ghost = 3; - #endif //PLMC - #ifdef PPMP +#endif // PLMC +#ifdef PPMP + H.n_ghost = 4; +#endif // PPMP +#ifdef PPMC H.n_ghost = 4; - #endif //PPMP - #ifdef PPMC - H.n_ghost=4; - #endif //PPMC +#endif // PPMC - #ifdef GRAVITY +#ifdef GRAVITY H.n_ghost_potential_offset = H.n_ghost - N_GHOST_POTENTIAL; - #endif +#endif +#ifdef MHD + // Set the number of ghost cells high enough for MHD. MHD needs one extra for the left most face + H.n_ghost++; +#endif // MHD } -/*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos) - * \brief Get the cell-centered position based on cell index */ +/*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real + * *zpos) \brief Get the cell-centered position based on cell index */ void Grid3D::Get_Position(long i, long j, long k, Real *x_pos, Real *y_pos, Real *z_pos) { +#ifndef MPI_CHOLLA -#ifndef MPI_CHOLLA - - *x_pos = H.xbound + H.dx*(i-H.n_ghost) + 0.5*H.dx; - *y_pos = H.ybound + H.dy*(j-H.n_ghost) + 0.5*H.dy; - *z_pos = H.zbound + H.dz*(k-H.n_ghost) + 0.5*H.dz; + *x_pos = H.xbound + H.dx * (i - H.n_ghost) + 0.5 * H.dx; + *y_pos = H.ybound + H.dy * (j - H.n_ghost) + 0.5 * H.dy; + *z_pos = H.zbound + H.dz * (k - H.n_ghost) + 0.5 * H.dz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* position relative to local xyz bounds */ - /* This approach was replaced because it is less consistent for multiple cores. - Since distributive property does not perfectly hold for floating point operations + /* This approach was replaced because it is less consistent for multiple + cores. Since distributive property does not perfectly hold for floating point + operations > Global_bound + global_i * dx is more consistent than - >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) + local_i*dx. + >local_bound + local_i*dx = (global_bound + (global_i-local_i)*dx) + + local_i*dx. *x_pos = H.xblocal + H.dx*(i-H.n_ghost) + 0.5*H.dx; *y_pos = H.yblocal + H.dy*(j-H.n_ghost) + 0.5*H.dy; *z_pos = H.zblocal + H.dz*(k-H.n_ghost) + 0.5*H.dz; */ - *x_pos = H.xbound + (nx_local_start+i-H.n_ghost)*H.dx + 0.5*H.dx; - *y_pos = H.ybound + (ny_local_start+j-H.n_ghost)*H.dy + 0.5*H.dy; - *z_pos = H.zbound + (nz_local_start+k-H.n_ghost)*H.dz + 0.5*H.dz; - -#endif /*MPI_CHOLLA*/ + *x_pos = H.xbound + (nx_local_start + i - H.n_ghost) * H.dx + 0.5 * H.dx; + *y_pos = H.ybound + (ny_local_start + j - H.n_ghost) * H.dy + 0.5 * H.dy; + *z_pos = H.zbound + (nz_local_start + k - H.n_ghost) * H.dz + 0.5 * H.dz; +#endif /*MPI_CHOLLA*/ } +Real Grid3D::Calc_Inverse_Timestep() +{ + // ==Calculate the next inverse time step using Calc_dt_GPU from + // hydro/hydro_cuda.h== + return Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama); +} /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) * \brief Initialize the grid. */ -void Grid3D::Initialize(struct parameters *P) +void Grid3D::Initialize(struct Parameters *P) { // number of fields to track (default 5 is # of conserved variables) H.n_fields = 5; - // if including passive scalars increase the number of fields - #ifdef SCALAR +// if including passive scalars increase the number of fields +#ifdef SCALAR H.n_fields += NSCALARS; - #endif +#endif - // if including magnetic fields increase the number of fields - #ifdef MHD +// if including magnetic fields increase the number of fields +#ifdef MHD H.n_fields += 3; - #endif //MHD +#endif // MHD - // if using dual energy formalism must track internal energy - always the last field! - #ifdef DE +// if using dual energy formalism must track internal energy - always the last +// field! +#ifdef DE H.n_fields++; - #endif +#endif int nx_in = P->nx; int ny_in = P->ny; int nz_in = P->nz; +#ifdef STATIC_GRAV + H.custom_grav = P->custom_grav; // Initialize the custom static gravity flag + if (H.custom_grav == 0) { + printf("WARNING: No custom gravity field given. Gravity field will be set to zero.\n"); + } +#endif + // Set the CFL coefficient (a global variable) C_cfl = 0.3; - - #ifdef AVERAGE_SLOW_CELLS - H.min_dt_slow = 1e-100; //Initialize the minumum dt to a tiny number - #endif + +#ifdef AVERAGE_SLOW_CELLS + H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number +#endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA // set grid dimensions - H.nx = nx_in+2*H.n_ghost; + H.nx = nx_in + 2 * H.n_ghost; H.nx_real = nx_in; - if (ny_in == 1) H.ny = 1; - else H.ny = ny_in+2*H.n_ghost; + if (ny_in == 1) + H.ny = 1; + else + H.ny = ny_in + 2 * H.n_ghost; H.ny_real = ny_in; - if (nz_in == 1) H.nz = 1; - else H.nz = nz_in+2*H.n_ghost; + if (nz_in == 1) + H.nz = 1; + else + H.nz = nz_in + 2 * H.n_ghost; H.nz_real = nz_in; // set total number of cells H.n_cells = H.nx * H.ny * H.nz; -#else /*MPI_CHOLLA*/ +#else /*MPI_CHOLLA*/ /* perform domain decomposition * and set grid dimensions @@ -164,20 +189,16 @@ void Grid3D::Initialize(struct parameters *P) #endif /*MPI_CHOLLA*/ // failsafe - if(H.n_cells<=0) - { + if (H.n_cells <= 0) { chprintf("Error initializing grid: H.n_cells = %d\n", H.n_cells); chexit(-1); } // check for initialization - if(flag_init) - { + if (flag_init) { chprintf("Already initialized. Please reset.\n"); return; - } - else - { + } else { // mark that we are initializing flag_init = 1; } @@ -191,399 +212,398 @@ void Grid3D::Initialize(struct parameters *P) // and initialize the timestep H.dt = 0.0; - // Set Transfer flag to false, only set to true before Conserved boundaries are transferred + // Set Transfer flag to false, only set to true before Conserved boundaries + // are transferred H.TRANSFER_HYDRO_BOUNDARIES = false; // Set output to true when data has to be written to file; H.Output_Now = false; - // allocate memory AllocateMemory(); - #ifdef ROTATED_PROJECTION - //x-dir pixels in projection + // x-dir pixels in projection R.nx = P->nxr; - //z-dir pixels in projection + // z-dir pixels in projection R.nz = P->nzr; - //minimum x location to project + // minimum x location to project R.nx_min = 0; - //minimum z location to project + // minimum z location to project R.nz_min = 0; - //maximum x location to project + // maximum x location to project R.nx_max = R.nx; - //maximum z location to project + // maximum z location to project R.nz_max = R.nz; - //rotation angle about z direction - R.delta = M_PI*(P->delta/180.); //convert to radians - //rotation angle about x direction - R.theta = M_PI*(P->theta/180.); //convert to radians - //rotation angle about y direction - R.phi = M_PI*(P->phi/180.); //convert to radians - //x-dir physical size of projection + // rotation angle about z direction + R.delta = M_PI * (P->delta / 180.); // convert to radians + // rotation angle about x direction + R.theta = M_PI * (P->theta / 180.); // convert to radians + // rotation angle about y direction + R.phi = M_PI * (P->phi / 180.); // convert to radians + // x-dir physical size of projection R.Lx = P->Lx; - //z-dir physical size of projection + // z-dir physical size of projection R.Lz = P->Lz; - //initialize a counter for rotated outputs + // initialize a counter for rotated outputs R.i_delta = 0; - //number of rotated outputs in a complete revolution + // number of rotated outputs in a complete revolution R.n_delta = P->n_delta; - //rate of rotation between outputs, for an actual simulation + // rate of rotation between outputs, for an actual simulation R.ddelta_dt = P->ddelta_dt; - //are we not rotating about z(0)? - //are we outputting multiple rotations(1)? or rotating during a simulation(2)? + // are we not rotating about z(0)? + // are we outputting multiple rotations(1)? or rotating during a + // simulation(2)? R.flag_delta = P->flag_delta; #endif /*ROTATED_PROJECTION*/ - // Values for lower limit for density and temperature - #ifdef DENSITY_FLOOR - H.density_floor = DENS_FLOOR; - #else - H.density_floor = 0.0; - #endif +// Values for lower limit for density and temperature +#ifdef TEMPERATURE_FLOOR + H.temperature_floor = P->temperature_floor; +#endif - #ifdef TEMPERATURE_FLOOR - H.temperature_floor = TEMP_FLOOR; - #else - H.temperature_floor = 0.0; - #endif +#ifdef DENSITY_FLOOR + H.density_floor = P->density_floor; +#endif - #ifdef COSMOLOGY - if ( P->scale_outputs_file[0] == '\0' ) H.OUTPUT_SCALE_FACOR = false; - else H.OUTPUT_SCALE_FACOR = true; - #endif +#ifdef SCALAR_FLOOR + H.scalar_floor = P->scalar_floor; +#endif - H.Output_Initial = true; +#ifdef COSMOLOGY + H.OUTPUT_SCALE_FACOR = not(P->scale_outputs_file[0] == '\0'); +#endif +#ifdef SCALAR + #ifdef DUST + H.grain_radius = P->grain_radius; + #endif +#endif + H.Output_Initial = true; } - /*! \fn void AllocateMemory(void) * \brief Allocate memory for the arrays. */ void Grid3D::AllocateMemory(void) { // allocate memory for the conserved variable arrays // allocate all the memory to density, to insure contiguous memory - CudaSafeCall( cudaHostAlloc((void**)&C.host, H.n_fields*H.n_cells*sizeof(Real), cudaHostAllocDefault) ); + GPU_Error_Check(cudaHostAlloc((void **)&C.host, H.n_fields * H.n_cells * sizeof(Real), cudaHostAllocDefault)); // point conserved variables to the appropriate locations - C.density = C.host; - C.momentum_x = &(C.host[H.n_cells]); - C.momentum_y = &(C.host[2*H.n_cells]); - C.momentum_z = &(C.host[3*H.n_cells]); - C.Energy = &(C.host[4*H.n_cells]); - #ifdef SCALAR - C.scalar = &(C.host[5*H.n_cells]); - #endif //SCALAR - #ifdef MHD - C.magnetic_x = &(C.host[(5 + NSCALARS)*H.n_cells]); - C.magnetic_y = &(C.host[(6 + NSCALARS)*H.n_cells]); - C.magnetic_z = &(C.host[(7 + NSCALARS)*H.n_cells]); - #endif //MHD - #ifdef DE - C.GasEnergy = &(C.host[(H.n_fields-1)*H.n_cells]); - #endif //DE + C.density = &(C.host[grid_enum::density * H.n_cells]); + C.momentum_x = &(C.host[grid_enum::momentum_x * H.n_cells]); + C.momentum_y = &(C.host[grid_enum::momentum_y * H.n_cells]); + C.momentum_z = &(C.host[grid_enum::momentum_z * H.n_cells]); + C.Energy = &(C.host[grid_enum::Energy * H.n_cells]); +#ifdef SCALAR + C.scalar = &(C.host[H.n_cells * grid_enum::scalar]); + #ifdef BASIC_SCALAR + C.basic_scalar = &(C.host[H.n_cells * grid_enum::basic_scalar]); + #endif + #ifdef DUST + C.dust_density = &(C.host[H.n_cells * grid_enum::dust_density]); + #endif +#endif // SCALAR +#ifdef MHD + C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]); + C.magnetic_y = &(C.host[grid_enum::magnetic_y * H.n_cells]); + C.magnetic_z = &(C.host[grid_enum::magnetic_z * H.n_cells]); +#endif // MHD +#ifdef DE + C.GasEnergy = &(C.host[(H.n_fields - 1) * H.n_cells]); +#endif // DE // allocate memory for the conserved variable arrays on the device - CudaSafeCall( cudaMalloc((void**)&C.device, H.n_fields*H.n_cells*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)&C.device, H.n_fields * H.n_cells * sizeof(Real))); + cuda_utilities::initGpuMemory(C.device, H.n_fields * H.n_cells * sizeof(Real)); C.d_density = C.device; C.d_momentum_x = &(C.device[H.n_cells]); - C.d_momentum_y = &(C.device[2*H.n_cells]); - C.d_momentum_z = &(C.device[3*H.n_cells]); - C.d_Energy = &(C.device[4*H.n_cells]); - #ifdef SCALAR - C.d_scalar = &(C.device[5*H.n_cells]); - #endif // SCALAR - #ifdef MHD - C.d_magnetic_x = &(C.device[(5 + NSCALARS)*H.n_cells]); - C.d_magnetic_y = &(C.device[(6 + NSCALARS)*H.n_cells]); - C.d_magnetic_z = &(C.device[(7 + NSCALARS)*H.n_cells]); - #endif //MHD - #ifdef DE - C.d_GasEnergy = &(C.device[(H.n_fields-1)*H.n_cells]); - #endif // DE - - - // arrays that hold the max_dti calculation for hydro for each thread block (pre reduction) - int ngrid = (H.n_cells + TPB - 1) / TPB; - CudaSafeCall( cudaHostAlloc(&host_dti_array, ngrid*sizeof(Real), cudaHostAllocDefault) ); - CudaSafeCall( cudaMalloc((void**)&dev_dti_array, ngrid*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&dev_dti, sizeof(Real)) ); - - - #if defined( GRAVITY ) - CudaSafeCall( cudaHostAlloc(&C.Grav_potential, H.n_cells*sizeof(Real), cudaHostAllocDefault) ); - CudaSafeCall( cudaMalloc((void**)&C.d_Grav_potential, H.n_cells*sizeof(Real)) ); - #else + C.d_momentum_y = &(C.device[2 * H.n_cells]); + C.d_momentum_z = &(C.device[3 * H.n_cells]); + C.d_Energy = &(C.device[4 * H.n_cells]); +#ifdef SCALAR + C.d_scalar = &(C.device[H.n_cells * grid_enum::scalar]); + #ifdef BASIC_SCALAR + C.d_basic_scalar = &(C.device[H.n_cells * grid_enum::basic_scalar]); + #endif + #ifdef DUST + C.d_dust_density = &(C.device[H.n_cells * grid_enum::dust_density]); + #endif +#endif // SCALAR +#ifdef MHD + C.d_magnetic_x = &(C.device[(grid_enum::magnetic_x)*H.n_cells]); + C.d_magnetic_y = &(C.device[(grid_enum::magnetic_y)*H.n_cells]); + C.d_magnetic_z = &(C.device[(grid_enum::magnetic_z)*H.n_cells]); +#endif // MHD +#ifdef DE + C.d_GasEnergy = &(C.device[(H.n_fields - 1) * H.n_cells]); +#endif // DE + +#if defined(GRAVITY) + GPU_Error_Check(cudaHostAlloc(&C.Grav_potential, H.n_cells * sizeof(Real), cudaHostAllocDefault)); + GPU_Error_Check(cudaMalloc((void **)&C.d_Grav_potential, H.n_cells * sizeof(Real))); +#else C.Grav_potential = NULL; C.d_Grav_potential = NULL; - #endif - +#endif - #ifdef CHEMISTRY_GPU - C.HI_density = &C.scalar[ 0*H.n_cells ]; - C.HII_density = &C.scalar[ 1*H.n_cells ]; - C.HeI_density = &C.scalar[ 2*H.n_cells ]; - C.HeII_density = &C.scalar[ 3*H.n_cells ]; - C.HeIII_density = &C.scalar[ 4*H.n_cells ]; - C.e_density = &C.scalar[ 5*H.n_cells ]; - #endif +#ifdef CHEMISTRY_GPU + C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + C.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + C.e_density = &C.host[H.n_cells * grid_enum::e_density]; +#endif // initialize host array - for (int i=0; i 1 && H.ny == 1 && H.nz == 1) //1D + if (H.nx > 1 && H.ny == 1 && H.nz == 1) // 1D { - #ifdef CUDA - #ifdef VL - VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); - #endif //VL - #ifdef SIMPLE - Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields); - #endif //SIMPLE - #endif //CUDA - } - else if (H.nx > 1 && H.ny > 1 && H.nz == 1) //2D - { - #ifdef CUDA - #ifdef VL - VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); - #endif //VL - #ifdef SIMPLE - Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, H.n_fields); - #endif //SIMPLE - #endif //CUDA - } - else if (H.nx > 1 && H.ny > 1 && H.nz > 1) //3D +#ifdef VL + VL_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); +#endif // VL +#ifdef SIMPLE + Simple_Algorithm_1D_CUDA(C.device, H.nx, x_off, H.n_ghost, H.dx, H.xbound, H.dt, H.n_fields, H.custom_grav); +#endif // SIMPLE + } else if (H.nx > 1 && H.ny > 1 && H.nz == 1) // 2D { - #ifdef CUDA - #ifdef VL - VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); - #endif //VL - #ifdef SIMPLE - Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, density_floor, U_floor, C.Grav_potential ); - #endif//SIMPLE - #endif - } - else +#ifdef VL + VL_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, + H.n_fields, H.custom_grav); +#endif // VL +#ifdef SIMPLE + Simple_Algorithm_2D_CUDA(C.device, H.nx, H.ny, x_off, y_off, H.n_ghost, H.dx, H.dy, H.xbound, H.ybound, H.dt, + H.n_fields, H.custom_grav); +#endif // SIMPLE + } else if (H.nx > 1 && H.ny > 1 && H.nz > 1) // 3D { +#ifdef VL + VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, + C.Grav_potential); +#endif // VL +#ifdef SIMPLE + Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, + H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, + C.Grav_potential); +#endif // SIMPLE + } else { chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", H.nx, H.ny, H.nz); chexit(-1); } - - #ifdef CUDA - - #ifdef COOLING_GPU - // ==Apply Cooling from cooling/cooling_cuda.h== - Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); - #endif //COOLING_GPU - - // Update the H and He ionization fractions and apply cooling and photoheating - #ifdef CHEMISTRY_GPU - Update_Chemistry(); - #ifdef CPU_TIME - Timer.Chemistry.RecordTime( Chem.H.runtime_chemistry_step ); - #endif - #endif - - #ifdef AVERAGE_SLOW_CELLS - //Set the min_delta_t for averaging a slow cell - Real max_dti_slow; - max_dti_slow = 1 / H.min_dt_slow; - Average_Slow_Cells( C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow ); - #endif //AVERAGE_SLOW_CELLS - - // ==Calculate the next time step with Calc_dt_GPU from hydro/hydro_cuda.h== - max_dti = Calc_dt_GPU(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_cells, H.dx, H.dy, H.dz, gama ); - #endif // CUDA - - #ifdef COOLING_GRACKLE - Cool.fields.density = C.density; - Cool.fields.HI_density = &C.scalar[ 0*H.n_cells ]; - Cool.fields.HII_density = &C.scalar[ 1*H.n_cells ]; - Cool.fields.HeI_density = &C.scalar[ 2*H.n_cells ]; - Cool.fields.HeII_density = &C.scalar[ 3*H.n_cells ]; - Cool.fields.HeIII_density = &C.scalar[ 4*H.n_cells ]; - Cool.fields.e_density = &C.scalar[ 5*H.n_cells ]; - #ifdef GRACKLE_METALS - Cool.fields.metal_density = &C.scalar[ 6*H.n_cells ]; - #endif - #endif - - #ifdef CHEMISTRY_GPU - C.HI_density = &C.scalar[ 0*H.n_cells ]; - C.HII_density = &C.scalar[ 1*H.n_cells ]; - C.HeI_density = &C.scalar[ 2*H.n_cells ]; - C.HeII_density = &C.scalar[ 3*H.n_cells ]; - C.HeIII_density = &C.scalar[ 4*H.n_cells ]; - C.e_density = &C.scalar[ 5*H.n_cells ]; - #endif - - - return max_dti; - +#ifdef CPU_TIME + Timer.Hydro_Integrator.End(true); +#endif // CPU_TIME } /*! \fn void Update_Hydro_Grid(void) * \brief Do all steps to update the hydro. */ -Real Grid3D::Update_Hydro_Grid( ){ - - #ifdef ONLY_PARTICLES +Real Grid3D::Update_Hydro_Grid() +{ +#ifdef ONLY_PARTICLES // Don't integrate the Hydro when only solving for particles return 1e-10; - #endif +#endif // ONLY_PARTICLES - Real dti; - - #ifdef CPU_TIME +#ifdef CPU_TIME Timer.Hydro.Start(); - #endif //CPU_TIME + double non_hydro_elapsed_time = 0.0; +#endif // CPU_TIME - #ifdef GRAVITY +#ifdef GRAVITY // Extrapolate gravitational potential for hydro step Extrapolate_Grav_Potential(); +#endif // GRAVITY + + Execute_Hydro_Integrator(); + +#ifdef TEMPERATURE_FLOOR + // Set the lower limit temperature (Internal Energy) + Real U_floor; + // Minimum of internal energy from minumum of temperature + U_floor = H.temperature_floor * KB / (gama - 1) / MP / SP_ENERGY_UNIT; + #ifdef COSMOLOGY + U_floor = H.temperature_floor / (gama - 1) / MP * KB * 1e-10; // ( km/s )^2 + U_floor /= Cosmo.v_0_gas * Cosmo.v_0_gas / Cosmo.current_a / Cosmo.current_a; #endif + Apply_Temperature_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, U_floor); +#endif // TEMPERATURE_FLOOR - dti = Update_Grid(); +#ifdef SCALAR_FLOOR + #ifdef DUST + Apply_Scalar_Floor(C.device, H.nx, H.ny, H.nz, H.n_ghost, grid_enum::dust_density, H.scalar_floor); + #endif +#endif // SCALAR_FLOOR +// == Perform chemistry/cooling (there are a few different cases) == +#ifdef COOLING_GPU #ifdef CPU_TIME - #ifdef CHEMISTRY_GPU - Timer.Hydro.Subtract(Chem.H.runtime_chemistry_step); - //Subtract the time spent on the Chemical Update + Timer.Cooling_GPU.Start(); #endif - Timer.Hydro.End(); - #endif //CPU_TIME - - #ifdef COOLING_GRACKLE + // ==Apply Cooling from cooling/cooling_cuda.h== + Cooling_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama); #ifdef CPU_TIME - Timer.Cooling.Start(); + Timer.Cooling_GPU.End(); #endif - Do_Cooling_Step_Grackle( ); + +#endif // COOLING_GPU + +#ifdef DUST + // ==Apply dust from dust/dust_cuda.h== + Dust_Update(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dt, gama, H.grain_radius); +#endif // DUST + +#ifdef CHEMISTRY_GPU + // Update the H and He ionization fractions and apply cooling and photoheating + Update_Chemistry(); #ifdef CPU_TIME - Timer.Cooling.End(); + Timer.Chemistry.RecordTime(Chem.H.runtime_chemistry_step); + non_hydro_elapsed_time += Chem.H.runtime_chemistry_step; + #endif + C.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + C.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + C.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + C.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + C.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + C.e_density = &C.host[H.n_cells * grid_enum::e_density]; +#endif + +#ifdef COOLING_GRACKLE + Cool.fields.density = C.density; + Cool.fields.HI_density = &C.host[H.n_cells * grid_enum::HI_density]; + Cool.fields.HII_density = &C.host[H.n_cells * grid_enum::HII_density]; + Cool.fields.HeI_density = &C.host[H.n_cells * grid_enum::HeI_density]; + Cool.fields.HeII_density = &C.host[H.n_cells * grid_enum::HeII_density]; + Cool.fields.HeIII_density = &C.host[H.n_cells * grid_enum::HeIII_density]; + Cool.fields.e_density = &C.host[H.n_cells * grid_enum::e_density]; + + #ifdef GRACKLE_METALS + Cool.fields.metal_density = &C.host[H.n_cells * grid_enum::metal_density]; #endif - #endif//COOLING_GRACKLE + #ifdef CPU_TIME + double cur_grackle_timing = Get_Time(); + #endif // CPU_TIME + Do_Cooling_Step_Grackle(); + #ifdef CPU_TIME + double cur_grackle_timing = Get_Time() - cur_grackle_timing; + Timer.Cooling_Grackle.RecordTime(cur_grackle_timing); + non_hydro_elapsed_time += cur_grackle_timing; + #endif // CPU_TIME +#endif // COOLING_GRACKLE + + // == average slow cells and compute the new timestep == +#ifdef AVERAGE_SLOW_CELLS + // Set the min_delta_t for averaging a slow cell + Real max_dti_slow; + max_dti_slow = 1 / H.min_dt_slow; + Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow); +#endif // AVERAGE_SLOW_CELLS + + // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== + Real dti = Calc_Inverse_Timestep(); + +#ifdef CPU_TIME + Timer.Hydro.Subtract(non_hydro_elapsed_time); + Timer.Hydro.End(); +#endif // CPU_TIME return dti; } -void Grid3D::Update_Time(){ - +void Grid3D::Update_Time() +{ // update the time H.t += H.dt; - #ifdef PARTICLES +#ifdef PARTICLES Particles.t = H.t; #ifdef COSMOLOGY Cosmo.current_a += Cosmo.delta_a; - Cosmo.current_z = 1./Cosmo.current_a - 1; + Cosmo.current_z = 1. / Cosmo.current_a - 1; Particles.current_a = Cosmo.current_a; Particles.current_z = Cosmo.current_z; - Grav.current_a = Cosmo.current_a; - #endif //COSMOLOGY - #endif //PARTICLES + Grav.current_a = Cosmo.current_a; + #endif // COSMOLOGY +#endif // PARTICLES - #if defined(ANALYSIS) && defined(COSMOLOGY) +#if defined(ANALYSIS) && defined(COSMOLOGY) Analysis.current_z = Cosmo.current_z; - #endif - - - - +#endif } /*! \fn void Reset(void) @@ -595,65 +615,70 @@ void Grid3D::Reset(void) // reset the initialization flag flag_init = 0; - } - /*! \fn void FreeMemory(void) * \brief Free the memory allocated by the Grid3D class. */ void Grid3D::FreeMemory(void) { // free the conserved variable arrays - CudaSafeCall( cudaFreeHost(C.host) ); + GPU_Error_Check(cudaFreeHost(C.host)); - // free the timestep arrays - CudaSafeCall( cudaFreeHost(host_dti_array) ); - cudaFree(dev_dti_array); - cudaFree(dev_dti); +#ifdef GRAVITY + GPU_Error_Check(cudaFreeHost(C.Grav_potential)); + GPU_Error_Check(cudaFree(C.d_Grav_potential)); +#endif - #ifdef GRAVITY - CudaSafeCall( cudaFreeHost(C.Grav_potential) ); - CudaSafeCall( cudaFree(C.d_Grav_potential) ); - #endif +// If memory is single allocated, free the memory at the end of the simulation. +#ifdef VL + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + Free_Memory_VL_1D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + Free_Memory_VL_2D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + Free_Memory_VL_3D(); + } +#endif // VL +#ifdef SIMPLE + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { + Free_Memory_Simple_1D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { + Free_Memory_Simple_2D(); + } + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + Free_Memory_Simple_3D(); + } +#endif // SIMPLE - // If memory is single allocated, free the memory at the end of the simulation. - #ifdef VL - if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_VL_1D(); - if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_VL_2D(); - if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_VL_3D(); - #endif // VL - #ifdef SIMPLE - if (H.nx > 1 && H.ny == 1 && H.nz == 1) Free_Memory_Simple_1D(); - if (H.nx > 1 && H.ny > 1 && H.nz == 1) Free_Memory_Simple_2D(); - if (H.nx > 1 && H.ny > 1 && H.nz > 1) Free_Memory_Simple_3D(); - #endif // SIMPLE - - #ifdef GRAVITY +#ifdef GRAVITY Grav.FreeMemory_CPU(); #ifdef GRAVITY_GPU Grav.FreeMemory_GPU(); #endif - #endif +#endif - #ifdef PARTICLES +#ifdef PARTICLES Particles.Reset(); - #endif +#endif - #ifdef COOLING_GRACKLE +#ifdef COOLING_GRACKLE Cool.Free_Memory(); - #endif +#endif - #ifdef COOLING_GPU +#ifdef COOLING_GPU #ifdef CLOUDY_COOL Free_Cuda_Textures(); #endif - #endif +#endif - #ifdef CHEMISTRY_GPU +#ifdef CHEMISTRY_GPU Chem.Reset(); - #endif +#endif - #ifdef ANALYSIS +#ifdef ANALYSIS Analysis.Reset(); - #endif +#endif } diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index ec48c27be..e248f6490 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -4,105 +4,104 @@ #ifndef GRID3D_H #define GRID3D_H -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" +#ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" #endif /*MPI_CHOLLA*/ #include + #include "../global/global.h" #include "../global/global_cuda.h" #ifdef HDF5 -#include + #include #endif #ifdef GRAVITY -#include "../gravity/grav3D.h" + #include "../gravity/grav3D.h" #endif #ifdef PARTICLES -#include "../particles/particles_3D.h" + #include "../particles/particles_3D.h" #endif #include "../model/disk_galaxy.h" #ifdef COSMOLOGY -#include "../cosmology/cosmology.h" + #include "../cosmology/cosmology.h" #endif #ifdef COOLING_GRACKLE -#include "../cooling_grackle/cool_grackle.h" + #include "../cooling_grackle/cool_grackle.h" #endif #ifdef CPU_TIME -#include "../utils/timing_functions.h" + #include "../utils/timing_functions.h" #endif #ifdef CHEMISTRY_GPU -#include "chemistry_gpu/chemistry_gpu.h" + #include "chemistry_gpu/chemistry_gpu.h" #endif #ifdef ANALYSIS -#include "../analysis/analysis.h" + #include "../analysis/analysis.h" #endif - -struct Rotation -{ +struct Rotation { /*! \var nx - * \brief Number of pixels in x-dir of rotated, projected image*/ + * \brief Number of pixels in x-dir of rotated, projected image*/ int nx; /*! \var nz - * \brief Number of pixels in z-dir of rotated, projected image*/ + * \brief Number of pixels in z-dir of rotated, projected image*/ int nz; /*! \var nx_min - * \brief Left most point in the projected image for this subvolume*/ + * \brief Left most point in the projected image for this subvolume*/ int nx_min; /*! \var nx_max - * \brief Right most point in the projected image for this subvolume*/ + * \brief Right most point in the projected image for this subvolume*/ int nx_max; /*! \var nz_min - * \brief Bottom most point in the projected image for this subvolume*/ + * \brief Bottom most point in the projected image for this subvolume*/ int nz_min; /*! \var nz_max - * \brief Top most point in the projected image for this subvolume*/ + * \brief Top most point in the projected image for this subvolume*/ int nz_max; /*! \var delta - * \brief Rotation angle about z axis in simulation frame*/ + * \brief Rotation angle about z axis in simulation frame*/ Real delta; /*! \var theta - * \brief Rotation angle about x axis in simulation frame*/ + * \brief Rotation angle about x axis in simulation frame*/ Real theta; /*! \var phi - * \brief Rotation angle about y axis in simulation frame*/ + * \brief Rotation angle about y axis in simulation frame*/ Real phi; /*! \var Lx - * \brief Physical x-dir size of projected image*/ + * \brief Physical x-dir size of projected image*/ Real Lx; /*! \var Lz - * \brief Physical z-dir size of projected image*/ + * \brief Physical z-dir size of projected image*/ Real Lz; /*! \var i_delta - * \brief number of output projection for delta rotation*/ + * \brief number of output projection for delta rotation*/ int i_delta; /*! \var n_delta - * \brief total number of output projection for delta rotation*/ + * \brief total number of output projection for delta rotation*/ Real n_delta; /*! \var ddelta_dt - * \brief rate of delta rotation*/ + * \brief rate of delta rotation*/ Real ddelta_dt; /*! \var flag_delta @@ -110,38 +109,37 @@ struct Rotation int flag_delta; }; -struct Header -{ +struct Header { /*! \var n_cells - * \brief Total number of cells in the grid (including ghost cells) */ + * \brief Total number of cells in the grid (including ghost cells) */ int n_cells; /*! \var n_ghost - * \brief Number of ghost cells on each side of the grid */ + * \brief Number of ghost cells on each side of the grid */ int n_ghost; /*! \var nx - * \brief Total number of cells in the x-dimension */ + * \brief Total number of cells in the x-dimension */ int nx; /*! \var ny - * \brief Total number of cells in the y-dimension */ + * \brief Total number of cells in the y-dimension */ int ny; /*! \var nz - * \brief Total number of cells in the z-dimension */ + * \brief Total number of cells in the z-dimension */ int nz; /*! \var nx_real - * \brief Number of real cells in the x-dimension */ + * \brief Number of real cells in the x-dimension */ int nx_real; /*! \var ny - * \brief Number of real cells in the y-dimension */ + * \brief Number of real cells in the y-dimension */ int ny_real; /*! \var nz - * \brief Number of real cells in the z-dimension */ + * \brief Number of real cells in the z-dimension */ int nz_real; /*! \var xbound */ @@ -156,7 +154,7 @@ struct Header /* \brief Global domain z-direction minimum */ Real zbound; - /*! \var xblocal */ + /*! \var xblocal */ /* \brief Local domain x-direction minimum */ Real xblocal; @@ -193,51 +191,56 @@ struct Header Real zdglobal; /*! \var dx - * \brief x-width of cells */ + * \brief x-width of cells */ Real dx; /*! \var dy - * \brief y-width of cells */ + * \brief y-width of cells */ Real dy; /*! \var dz - * \brief z-width of cells */ + * \brief z-width of cells */ Real dz; /*! \var t - * \brief Simulation time */ + * \brief Simulation time */ Real t; /*! \var dt - * \brief Length of the current timestep */ + * \brief Length of the current timestep */ Real dt; - #ifdef AVERAGE_SLOW_CELLS +#ifdef AVERAGE_SLOW_CELLS Real min_dt_slow; - #endif +#endif /*! \var t_wall - * \brief Wall time */ + * \brief Wall time */ Real t_wall; /*! \var n_step - * \brief Number of timesteps taken */ + * \brief Number of timesteps taken */ int n_step; /*! \var n_fields - * \brief Number of fields (conserved variables, scalars, etc.) */ + * \brief Number of fields (conserved variables, scalars, etc.) */ int n_fields; + /*! \var custom_grav + * \brief Flag to set specific static gravity field */ + int custom_grav; + // Values for lower limit for density and temperature - Real density_floor; Real temperature_floor; + Real density_floor; + Real scalar_floor; Real Ekin_avrg; - //Flag to indicate when to transfer the Conserved boundaries + // Flag to indicate when to transfer the Conserved boundaries bool TRANSFER_HYDRO_BOUNDARIES; - //Parameters For Spherical Colapse Problem + // Parameters For Spherical Colapse Problem Real sphere_density; Real sphere_radius; Real sphere_background_density; @@ -245,607 +248,677 @@ struct Header Real sphere_center_y; Real sphere_center_z; - - #ifdef GRAVITY +#ifdef GRAVITY /*! \var n_ghost_potential_offset - * \brief Number of offset betewen hydro_ghost_cells and potential_ghost_cells */ + * \brief Number of offset betewen hydro_ghost_cells and + * potential_ghost_cells */ int n_ghost_potential_offset; - #endif +#endif - #ifdef COSMOLOGY +#ifdef COSMOLOGY bool OUTPUT_SCALE_FACOR; - #endif +#endif /*! \var Output_Now - * \brief Flag set to true when data has to be written to file */ + * \brief Flag set to true when data has to be written to file */ bool Output_Now; bool Output_Initial; /*! \var Output_Complete_Data - * \brief Flag set to true when all the data will be written to file (Restart File ) */ + * \brief Flag set to true when all the data will be written to file + * (Restart File ) */ bool Output_Complete_Data; - +#ifdef SCALAR + #ifdef DUST + Real grain_radius; + #endif +#endif }; /*! \class Grid3D * \brief Class to create a 3D grid of cells. */ class Grid3D { - public: + public: + /*! \var flag_init + * \brief Initialization flag */ + int flag_init; - /*! \var flag_init - * \brief Initialization flag */ - int flag_init; + /*! \var struct Header H + * \brief Header for the grid */ + struct Header H; - /*! \var struct Header H - * \brief Header for the grid */ - struct Header H; + /*! \var struct Rotation R + * \brief Rotation struct for data projections */ + struct Rotation R; - /*! \var struct Rotation R - * \brief Rotation struct for data projections */ - struct Rotation R; +#ifdef GRAVITY + // Object that contains data for gravity + Grav3D Grav; +#endif - #ifdef GRAVITY - // Object that contains data for gravity - Grav3D Grav; - #endif +#ifdef PARTICLES + // Object that contains data for particles + Particles3D Particles; +#endif - #ifdef PARTICLES - // Object that contains data for particles - Particles_3D Particles; - #endif +#ifdef COSMOLOGY + // Object that contains data for cosmology + Cosmology Cosmo; +#endif - #ifdef COSMOLOGY - // Object that contains data for cosmology - Cosmology Cosmo; - #endif +#ifdef COOLING_GRACKLE + // Object that contains data for Grackle cooling + Cool_GK Cool; +#endif - #ifdef COOLING_GRACKLE - // Object that contains data for Grackle cooling - Cool_GK Cool; - #endif +#ifdef CPU_TIME + Time Timer; +#endif - #ifdef CPU_TIME - Time Timer; - #endif +#ifdef CHEMISTRY_GPU + // Object that contains data for the GPU chemistry solver + Chem_GPU Chem; +#endif - #ifdef CHEMISTRY_GPU - // Object that contains data for the GPU chemistry solver - Chem_GPU Chem; - #endif +#ifdef ANALYSIS + AnalysisModule Analysis; +#endif - #ifdef ANALYSIS - Analysis_Module Analysis; - #endif +#ifdef SUPERNOVA // TODO refactor this into Analysis module + Real countSN; + Real countResolved; + Real countUnresolved; + Real totalEnergy; + Real totalMomentum; + Real totalUnresEnergy; +#endif + struct Conserved { + /*! pointer to conserved variable array on the host */ + Real *host; + + /*! \var density + * \brief Array containing the density of each cell in the grid */ + Real *density; + + /*! \var momentum_x + * \brief Array containing the momentum in the x direction of each cell in + * the grid */ + Real *momentum_x; + + /*! \var momentum_y + * \brief Array containing the momentum in the y direction of each cell in + * the grid */ + Real *momentum_y; + + /*! \var momentum_z + * \brief Array containing the momentum in the z direction of each cell in + * the grid */ + Real *momentum_z; + + /*! \var Energy + * \brief Array containing the total Energy of each cell in the grid */ + Real *Energy; + +#ifdef SCALAR + /*! \var scalar + * \brief Array containing the values of passive scalar variable(s). */ + Real *scalar; + #ifdef BASIC_SCALAR + /*! \var basic_scalar + * \brief Array containing the values of a basic passive scalar variable. + */ + Real *basic_scalar; + #endif + #ifdef DUST + /*! \var dust_density + * \brief Array containing the dust densities. + */ + Real *dust_density; + #endif +#endif // SCALAR + +#ifdef MHD + /*! \var magnetic_x \brief Array containing the magnetic field in the x + * direction of each cell in the grid. Note that this is the magnetic + * field at the x+1/2 face of the cell since constrained transport + * requires face centered, not cell centered, magnetic fields */ + Real *magnetic_x; + + /*! \var magnetic_y \brief Array containing the magnetic field in the y + * direction of each cell in the grid. Note that this is the magnetic + * field at the y+1/2 face of the cell since constrained transport + * requires face centered, not cell centered, magnetic fields */ + Real *magnetic_y; + + /*! \var magnetic_z \brief Array containing the magnetic field in the z + * direction of each cell in the grid. Note that this is the magnetic + * field at the z+1/2 face of the cell since constrained transport + * requires face centered, not cell centered, magnetic fields */ + Real *magnetic_z; +#endif // MHD + +#ifdef DE + /*! \var GasEnergy + * \brief Array containing the internal energy of each cell, only tracked + separately when using the dual-energy formalism. */ + Real *GasEnergy; +#endif // DE + + /*! \var grav_potential + * \brief Array containing the gravitational potential of each cell, only + * tracked separately when using GRAVITY. */ + Real *Grav_potential; - struct Conserved - { - /*! pointer to conserved variable array on the host */ - Real *host; - - /*! \var density - * \brief Array containing the density of each cell in the grid */ - Real *density; - - /*! \var momentum_x - * \brief Array containing the momentum in the x direction of each cell in the grid */ - Real *momentum_x; - - /*! \var momentum_y - * \brief Array containing the momentum in the y direction of each cell in the grid */ - Real *momentum_y; - - /*! \var momentum_z - * \brief Array containing the momentum in the z direction of each cell in the grid */ - Real *momentum_z; - - /*! \var Energy - * \brief Array containing the total Energy of each cell in the grid */ - Real *Energy; - - #ifdef SCALAR - /*! \var scalar - * \brief Array containing the values of the passive scalar variable(s). */ - Real *scalar; - #endif // SCALAR - - #ifdef MHD - /*! \var magnetic_x \brief Array containing the magnetic field in the x - * direction of each cell in the grid. Note that this is the magnetic - * field at the x+1/2 face of the cell since constrained transport - * requires face centered, not cell centered, magnetic fields */ - Real *magnetic_x; - - /*! \var magnetic_y \brief Array containing the magnetic field in the y - * direction of each cell in the grid. Note that this is the magnetic - * field at the y+1/2 face of the cell since constrained transport - * requires face centered, not cell centered, magnetic fields */ - Real *magnetic_y; - - /*! \var magnetic_z \brief Array containing the magnetic field in the z - * direction of each cell in the grid. Note that this is the magnetic - * field at the z+1/2 face of the cell since constrained transport - * requires face centered, not cell centered, magnetic fields */ - Real *magnetic_z; - #endif // MHD - - #ifdef DE - /*! \var GasEnergy - * \brief Array containing the internal energy of each cell, only tracked separately when using - the dual-energy formalism. */ - Real *GasEnergy; - #endif // DE - - /*! \var grav_potential - * \brief Array containing the gravitational potential of each cell, only tracked separately when using GRAVITY. */ - Real *Grav_potential; - - #ifdef CHEMISTRY_GPU - Real *HI_density; - Real *HII_density; - Real *HeI_density; - Real *HeII_density; - Real *HeIII_density; - Real *e_density; - #endif - - - /*! pointer to conserved variable on device */ - Real *device; - Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, - *d_Energy, *d_scalar, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, - *d_GasEnergy; - - /*! pointer to gravitational potential on device */ - Real *d_Grav_potential; - } C; - - - /*! \fn Grid3D(void) - * \brief Constructor for the grid */ - Grid3D(void); - - /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) - * \brief Initialize the grid. */ - void Initialize(struct parameters *P); - - /*! \fn void AllocateMemory(void) - * \brief Allocate memory for the d, m, E arrays. */ - void AllocateMemory(void); - - /*! \fn void Set_Initial_Conditions(parameters P) - * \brief Set the initial conditions based on info in the parameters structure. */ - void Set_Initial_Conditions(parameters P); - - /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos) - * \brief Get the cell-centered position based on cell index */ - void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos); - - /*! \fn void Set_Domain_Properties(struct parameters P) - * \brief Set local domain properties */ - void Set_Domain_Properties(struct parameters P); - - /*! \fn void set_dt(Real dti) - * \brief Calculate the timestep. */ - void set_dt(Real dti); - - #ifdef GRAVITY - /*! \fn void set_dt(Real dti) - * \brief Calculate the timestep for Gravity. */ - void set_dt_Gravity(); - #endif +#ifdef CHEMISTRY_GPU + Real *HI_density; + Real *HII_density; + Real *HeI_density; + Real *HeII_density; + Real *HeIII_density; + Real *e_density; +#endif - /*! \fn Real calc_dti_CPU_1D() - * \brief Calculate the maximum inverse timestep on 1D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_1D(); + /*! pointer to conserved variable on device */ + Real *device; + Real *d_density, *d_momentum_x, *d_momentum_y, *d_momentum_z, *d_Energy, *d_scalar, *d_basic_scalar, + *d_dust_density, *d_magnetic_x, *d_magnetic_y, *d_magnetic_z, *d_GasEnergy; - /*! \fn Real calc_dti_CPU_2D() - * \brief Calculate the maximum inverse timestep on 2D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_2D(); + /*! pointer to gravitational potential on device */ + Real *d_Grav_potential; + } C; - /*! \fn Real calc_dti_CPU_3D_function() - * \brief Calculate the maximum inverse timestep on 3D using openMP, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_3D_function( int g_start, int g_end ); + /*! \fn Grid3D(void) + * \brief Constructor for the grid */ + Grid3D(void); - /*! \fn Real calc_dti_CPU_3D() - * \brief Calculate the maximum inverse timestep on 3D, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU_3D(); + /*! \fn void Initialize(int nx_in, int ny_in, int nz_in) + * \brief Initialize the grid. */ + void Initialize(struct Parameters *P); - /*! \fn Real calc_dti_CPU() - * \brief Calculate the maximum inverse timestep, according to the CFL condition (Toro 6.17). */ - Real calc_dti_CPU(); + /*! \fn void AllocateMemory(void) + * \brief Allocate memory for the d, m, E arrays. */ + void AllocateMemory(void); - /*! \fn void Update_Grid(void) - * \brief Update the conserved quantities in each cell. */ - Real Update_Grid(void); + /*! \fn void Set_Initial_Conditions(Parameters P ) + * \brief Set the initial conditions based on info in the parameters + * structure. */ + void Set_Initial_Conditions(Parameters P); - /*! \fn void Update_Hydro_Grid(void) - * \brief Do all steps to update the hydro. */ - Real Update_Hydro_Grid(void); + /*! \fn void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real + * *zpos) \brief Get the cell-centered position based on cell index */ + void Get_Position(long i, long j, long k, Real *xpos, Real *ypos, Real *zpos); - void Update_Time(); + Real Calc_Inverse_Timestep(); - /*! \fn void Write_Header_Text(FILE *fp) - * \brief Write the relevant header info to a text output file. */ - void Write_Header_Text(FILE *fp); + /*! \fn void Set_Domain_Properties(struct Parameters P) + * \brief Set local domain properties */ + void Set_Domain_Properties(struct Parameters P); - /*! \fn void Write_Grid_Text(FILE *fp) - * \brief Write the grid to a file, at the current simulation time. */ - void Write_Grid_Text(FILE *fp); + /*! \fn void set_dt(Real dti) + * \brief Calculate the timestep. */ + void set_dt(Real dti); - /*! \fn void Write_Header_Binary(FILE *fp) - * \brief Write the relevant header info to a binary output file. */ - void Write_Header_Binary(FILE *fp); +#ifdef GRAVITY + /*! \fn void set_dt(Real dti) + * \brief Calculate the timestep for Gravity. */ + void set_dt_Gravity(); +#endif - /*! \fn void Write_Grid_Binary(FILE *fp) - * \brief Write the grid to a file, at the current simulation time. */ - void Write_Grid_Binary(FILE *fp); + /*! \fn void Execute_Hydro_Integratore_Grid(void) + * \brief Updates cells by executing the hydro integrator. */ + void Execute_Hydro_Integrator(void); -#ifdef HDF5 - /*! \fn void Write_Header_HDF5(hid_t file_id) - * \brief Write the relevant header info to the HDF5 file. */ - void Write_Header_HDF5(hid_t file_id); + /*! \fn void Update_Hydro_Grid(void) + * \brief Do all steps to update the hydro. */ + Real Update_Hydro_Grid(void); - /*! \fn void Write_Grid_HDF5(hid_t file_id) - * \brief Write the grid to a file, at the current simulation time. */ - void Write_Grid_HDF5(hid_t file_id); + void Update_Time(); + /*! \fn void Write_Header_Text(FILE *fp) + * \brief Write the relevant header info to a text output file. */ + void Write_Header_Text(FILE *fp); - /*! \fn void Write_Projection_HDF5(hid_t file_id) - * \brief Write projected density and temperature data to a file. */ - void Write_Projection_HDF5(hid_t file_id); + /*! \fn void Write_Grid_Text(FILE *fp) + * \brief Write the grid to a file, at the current simulation time. */ + void Write_Grid_Text(FILE *fp); - /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id) - * \brief Write the relevant header info to the HDF5 file for rotated projection. */ - void Write_Header_Rotated_HDF5(hid_t file_id); + /*! \fn void Write_Header_Binary(FILE *fp) + * \brief Write the relevant header info to a binary output file. */ + void Write_Header_Binary(FILE *fp); - /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id) - * \brief Write rotated projected data to a file, at the current simulation time. */ - void Write_Rotated_Projection_HDF5(hid_t file_id); + /*! \fn void Write_Grid_Binary(FILE *fp) + * \brief Write the grid to a file, at the current simulation time. */ + void Write_Grid_Binary(FILE *fp); - /*! \fn void Write_Slices_HDF5(hid_t file_id) - * \brief Write xy, xz, and yz slices of all data to a file. */ - void Write_Slices_HDF5(hid_t file_id); +#ifdef HDF5 + /*! \fn void Write_Header_HDF5(hid_t file_id) + * \brief Write the relevant header info to the HDF5 file. */ + void Write_Header_HDF5(hid_t file_id); -#endif + /*! \fn void Write_Grid_HDF5(hid_t file_id) + * \brief Write the grid to a file, at the current simulation time. */ + void Write_Grid_HDF5(hid_t file_id); - /*! \fn void Read_Grid(struct parameters P) - * \brief Read in grid data from an output file. */ - void Read_Grid(struct parameters P); + /*! \fn void Write_Projection_HDF5(hid_t file_id) + * \brief Write projected density and temperature data to a file. */ + void Write_Projection_HDF5(hid_t file_id); - /*! \fn Read_Grid_Binary(FILE *fp) - * \brief Read in grid data from a binary file. */ - void Read_Grid_Binary(FILE *fp); + /*! \fn void Write_Header_Rotated_HDF5(hid_t file_id) + * \brief Write the relevant header info to the HDF5 file for rotated + * projection. */ + void Write_Header_Rotated_HDF5(hid_t file_id); -#ifdef HDF5 - /*! \fn void Read_Grid_HDF5(hid_t file_id) - * \brief Read in grid data from an hdf5 file. */ - void Read_Grid_HDF5(hid_t file_id, struct parameters P); -#endif + /*! \fn void Write_Rotated_Projection_HDF5(hid_t file_id) + * \brief Write rotated projected data to a file, at the current simulation + * time. */ + void Write_Rotated_Projection_HDF5(hid_t file_id); - /*! \fn void Reset(void) - * \brief Reset the Grid3D class. */ - void Reset(void); - - /*! \fn void FreeMemory(void) - * \brief Free the memory for the density array. */ - void FreeMemory(void); - - /*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P) - * \brief Constant gas properties. */ - void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz); - - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Sine wave perturbation. */ - void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); - - /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */ - void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A); + /*! \fn void Write_Slices_HDF5(hid_t file_id) + * \brief Write xy, xz, and yz slices of all data to a file. */ + void Write_Slices_HDF5(hid_t file_id); - /*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph) - * \brief Initialize the grid with a Riemann problem. */ - void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph); +#endif - /*! \fn void Shu_Osher() - * \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone 2008, Section 8.1 */ - void Shu_Osher(); + /*! \fn void Read_Grid(struct Parameters P) + * \brief Read in grid data from 1-per-process output files. */ + void Read_Grid(struct Parameters P); - /*! \fn void Blast_1D() - * \brief Initialize the grid with two interacting blast waves. See Stone 2008, Section 8.1.*/ - void Blast_1D(); + /*! \fn void Read_Grid_Cat(struct Parameters P) + * \brief Read in grid data from a single concatenated output file. */ + void Read_Grid_Cat(struct Parameters P); - /*! \fn void KH() - * \brief Initialize the grid with a Kelvin-Helmholtz instability with a discontinuous interface. */ - void KH(); - - /*! \fn void KH_res_ind() - * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes are resolution independent. */ - void KH_res_ind(); - - /*! \fn void Rayleigh_Taylor() - * \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */ - void Rayleigh_Taylor(); - - /*! \fn void Gresho() - * \brief Initialize the grid with the 2D Gresho problem described in LW03. */ - void Gresho(); - - /*! \fn void Implosion_2D() - * \brief Implosion test described in Liska, 2003. */ - void Implosion_2D(); - - /*! \fn void Explosion_2D() - * \brief Explosion test described in Liska, 2003. */ - void Explosion_2D(); - - /*! \fn void Noh_2D() - * \brief Noh test described in Liska, 2003. */ - void Noh_2D(); - - /*! \fn void Noh_3D() - * \brief Noh test described in Stone, 2008. */ - void Noh_3D(); - - /*! \fn void Disk_2D() - * \brief Initialize the grid with a 2D disk following a Kuzmin profile. */ - void Disk_2D(); - - /*! \fn void Disk_3D(parameters P) - * \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai profile. */ - void Disk_3D(parameters P); - - /*! \fn void Set_Boundary_Conditions(parameters P) - * \brief Set the boundary conditions based on info in the parameters structure. */ - void Set_Boundary_Conditions(parameters P); - - /*! \fn void Set_Boundary_Conditions_Grid(parameters P) - * \brief Set the boundary conditions for all components based on info in the parameters structure. */ - void Set_Boundary_Conditions_Grid( parameters P); - - /*! \fn int Check_Custom_Boundary(int *flags, struct parameters P) - * \brief Check for custom boundary conditions */ - int Check_Custom_Boundary(int *flags, struct parameters P); - - /*! \fn void Set_Boundaries(int dir, int flags[]) - * \brief Apply boundary conditions to the grid. */ - void Set_Boundaries(int dir, int flags[]); - - /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) - * \brief Set the extents of the ghost region we are initializing. */ - void Set_Boundary_Extents(int dir, int *imin, int *imax); - - /*! \fn void Custom_Boundary(char bcnd[MAXLEN]) - * \brief Select appropriate custom boundary function. */ - void Custom_Boundary(char bcnd[MAXLEN]); - - /*! \fn void Noh_Boundary() - * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, - as per the Noh problem in Liska, 2003, or in Stone, 2008. */ - void Noh_Boundary(); - - /*! \fn void Spherical_Overpressure_3D() - * \brief Initialize the grid with a 3D spherical overdensity and overpressue. */ - void Spherical_Overpressure_3D(); - - /*! \fn void Spherical_Overpressure_3D() - * \brief Initialize the grid with a 3D spherical overdensity for gravitational collapse */ - void Spherical_Overdensity_3D(); - - void Clouds(); - - void Uniform_Grid(); - - void Zeldovich_Pancake( struct parameters P ); - - void Chemistry_Test( struct parameters P ); - - -#ifdef MPI_CHOLLA - void Set_Boundaries_MPI(struct parameters P); - void Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P); - void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags); - void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags); - void Unload_MPI_Comm_Buffers(int index); - - int Load_Hydro_DeviceBuffer_X0(Real *buffer); - int Load_Hydro_DeviceBuffer_X1(Real *buffer); - int Load_Hydro_DeviceBuffer_Y0(Real *buffer); - int Load_Hydro_DeviceBuffer_Y1(Real *buffer); - int Load_Hydro_DeviceBuffer_Z0(Real *buffer); - int Load_Hydro_DeviceBuffer_Z1(Real *buffer); + /*! \fn Read_Grid_Binary(FILE *fp) + * \brief Read in grid data from a binary file. */ + void Read_Grid_Binary(FILE *fp); - void Unload_Hydro_DeviceBuffer_X0(Real *buffer); - void Unload_Hydro_DeviceBuffer_X1(Real *buffer); - void Unload_Hydro_DeviceBuffer_Y0(Real *buffer); - void Unload_Hydro_DeviceBuffer_Y1(Real *buffer); - void Unload_Hydro_DeviceBuffer_Z0(Real *buffer); - void Unload_Hydro_DeviceBuffer_Z1(Real *buffer); +#ifdef HDF5 + /*! \fn void Read_Grid_HDF5(hid_t file_id) + * \brief Read in grid data from an hdf5 file. */ + void Read_Grid_HDF5(hid_t file_id, struct Parameters P); +#endif + + /*! \fn void Reset(void) + * \brief Reset the Grid3D class. */ + void Reset(void); + + /*! \fn void FreeMemory(void) + * \brief Free the memory for the density array. */ + void FreeMemory(void); + + /*! + * \brief Constant gas properties. + * + * \param[in] P the parameters struct. + */ + void Constant(Parameters const &P); + + /*! + * \brief Sine wave perturbation. + * + * \param[in] P the parameters struct. + */ + void Sound_Wave(Parameters const &P); + + /*! + * \brief Initialize the grid with a simple linear wave. + * + * \param[in] P the parameters struct. + */ + void Linear_Wave(Parameters const &P); + + /*! + * \brief Square wave density perturbation with amplitude A*rho in pressure + * equilibrium. + * + * \param[in] P the parameters struct. + */ + void Square_Wave(Parameters const &P); + + /*! + * \brief Initialize the grid with a Riemann problem. + * + * \param[in] P the parameters struct. + */ + void Riemann(Parameters const &P); + + /*! \fn void Shu_Osher() + * \brief Initialize the grid with the Shu-Osher shock tube problem. See + * Stone 2008, Section 8.1 */ + void Shu_Osher(); + + /*! \fn void Blast_1D() + * \brief Initialize the grid with two interacting blast waves. See Stone + * 2008, Section 8.1.*/ + void Blast_1D(); + + /*! \fn void KH() + * \brief Initialize the grid with a Kelvin-Helmholtz instability with a + * discontinuous interface. */ + void KH(); + + /*! \fn void KH_res_ind() + * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes + * are resolution independent. */ + void KH_res_ind(); + + /*! \fn void Rayleigh_Taylor() + * \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */ + void Rayleigh_Taylor(); + + /*! \fn void Gresho() + * \brief Initialize the grid with the 2D Gresho problem described in LW03. + */ + void Gresho(); + + /*! \fn void Implosion_2D() + * \brief Implosion test described in Liska, 2003. */ + void Implosion_2D(); + + /*! \fn void Explosion_2D() + * \brief Explosion test described in Liska, 2003. */ + void Explosion_2D(); + + /*! \fn void Noh_2D() + * \brief Noh test described in Liska, 2003. */ + void Noh_2D(); + + /*! \fn void Noh_3D() + * \brief Noh test described in Stone, 2008. */ + void Noh_3D(); + + /*! \fn void Disk_2D() + * \brief Initialize the grid with a 2D disk following a Kuzmin profile. */ + void Disk_2D(); + + /*! \fn void Disk_3D(Parameters P ) + * \brief Initialize the grid with a 3D disk following a Miyamoto-Nagai + * profile. */ + void Disk_3D(Parameters P); + + /*! \fn void Set_Boundary_Conditions(Parameters P ) + * \brief Set the boundary conditions based on info in the parameters + * structure. */ + void Set_Boundary_Conditions(Parameters P); + + /*! \fn void Set_Boundary_Conditions_Grid(Parameters P ) + * \brief Set the boundary conditions for all components based on info in the + * parameters structure. */ + void Set_Boundary_Conditions_Grid(Parameters P); + + /*! \fn int Check_Custom_Boundary(int *flags, struct Parameters P) + * \brief Check for custom boundary conditions */ + int Check_Custom_Boundary(int *flags, struct Parameters P); + + /*! \fn void Set_Boundaries(int dir, int flags[]) + * \brief Apply boundary conditions to the grid. */ + void Set_Boundaries(int dir, int flags[]); + + /*! \fn Set_Boundary_Extents(int dir, int *imin, int *imax) + * \brief Set the extents of the ghost region we are initializing. */ + void Set_Boundary_Extents(int dir, int *imin, int *imax); + + /*! \fn void Custom_Boundary(char bcnd[MAXLEN]) + * \brief Select appropriate custom boundary function. */ + void Custom_Boundary(char bcnd[MAXLEN]); + + /*! \fn void Wind_Boundary() + * \brief Apply a constant wind to the -x boundary. */ + void Wind_Boundary(); + + /*! \fn void Noh_Boundary() + * \brief Apply analytic boundary conditions to +x, +y (and +z) faces, + as per the Noh problem in Liska, 2003, or in Stone, 2008. */ + void Noh_Boundary(); + + /*! \fn void Spherical_Overpressure_3D() + * \brief Initialize the grid with a 3D spherical overdensity and + * overpressue. */ + void Spherical_Overpressure_3D(); + + /*! \fn void Spherical_Overpressure_3D() + * \brief Initialize the grid with a 3D spherical overdensity for + * gravitational collapse */ + void Spherical_Overdensity_3D(); + + void Clouds(); + + void Uniform_Grid(); + + void Zeldovich_Pancake(struct Parameters P); + + void Chemistry_Test(struct Parameters P); + +#ifdef MHD + /*! + * \brief Initialize the grid with a circularly polarized Alfven wave. Only options are angle and Vx. See [Gardiner & + * Stone 2008](https://arxiv.org/abs/0712.2634) pages 4134-4135 for details. + * + * \param P The parameters. Only uses Vx, pitch, and yaw + */ + void Circularly_Polarized_Alfven_Wave(struct Parameters const P); + + /*! + * \brief Initialize the grid with a advecting field loop. See [Gardiner & + * Stone 2008](https://arxiv.org/abs/0712.2634). + * + * \param P The parameters object + */ + void Advecting_Field_Loop(struct Parameters const P); + + /*! + * \brief Initialize the grid with a spherical MHD blast wave. See [Gardiner & + * Stone 2008](https://arxiv.org/abs/0712.2634) for details. + * + * \param P The parameters struct + */ + void MHD_Spherical_Blast(struct Parameters const P); + + /*! + * \brief Initialize the grid with the Orszag-Tang Vortex. See [Gardiner & Stone + * 2008](https://arxiv.org/abs/0712.2634) + * + * \param P The parameters. + */ + void Orszag_Tang_Vortex(); +#endif // MHD + +#ifdef MPI_CHOLLA + void Set_Boundaries_MPI(struct Parameters P); + void Set_Boundaries_MPI_BLOCK(int *flags, struct Parameters P); + void Load_and_Send_MPI_Comm_Buffers(int dir, int *flags); + void Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags); + void Unload_MPI_Comm_Buffers(int index); + + int Load_Hydro_DeviceBuffer_X0(Real *buffer); + int Load_Hydro_DeviceBuffer_X1(Real *buffer); + int Load_Hydro_DeviceBuffer_Y0(Real *buffer); + int Load_Hydro_DeviceBuffer_Y1(Real *buffer); + int Load_Hydro_DeviceBuffer_Z0(Real *buffer); + int Load_Hydro_DeviceBuffer_Z1(Real *buffer); + + void Unload_Hydro_DeviceBuffer_X0(Real *buffer); + void Unload_Hydro_DeviceBuffer_X1(Real *buffer); + void Unload_Hydro_DeviceBuffer_Y0(Real *buffer); + void Unload_Hydro_DeviceBuffer_Y1(Real *buffer); + void Unload_Hydro_DeviceBuffer_Z0(Real *buffer); + void Unload_Hydro_DeviceBuffer_Z1(Real *buffer); #endif /*MPI_CHOLLA*/ - #ifdef GRAVITY - void Initialize_Gravity( struct parameters *P ); - void Compute_Gravitational_Potential( struct parameters *P ); - void Copy_Hydro_Density_to_Gravity_Function( int g_start, int g_end); +#ifdef GRAVITY + void Initialize_Gravity(struct Parameters *P); + void Compute_Gravitational_Potential(struct Parameters *P); + void Copy_Hydro_Density_to_Gravity_Function(int g_start, int g_end); void Copy_Hydro_Density_to_Gravity(); - void Extrapolate_Grav_Potential_Function( int g_start, int g_end ); + void Extrapolate_Grav_Potential_Function(int g_start, int g_end); void Extrapolate_Grav_Potential(); - void Set_Potential_Boundaries_Periodic( int direction, int side, int *flags ); - int Load_Gravity_Potential_To_Buffer( int direction, int side, Real *buffer, int buffer_start ); - void Unload_Gravity_Potential_from_Buffer( int direction, int side, Real *buffer, int buffer_start ); - void Set_Potential_Boundaries_Isolated( int direction, int side, int *flags ); - void Compute_Potential_Boundaries_Isolated( int dir, struct parameters *P ); - void Compute_Potential_Isolated_Boundary( int direction, int side, int bc_potential_type ); + void Set_Potential_Boundaries_Periodic(int direction, int side, int *flags); + int Load_Gravity_Potential_To_Buffer(int direction, int side, Real *buffer, int buffer_start); + void Unload_Gravity_Potential_from_Buffer(int direction, int side, Real *buffer, int buffer_start); + void Set_Potential_Boundaries_Isolated(int direction, int side, int *flags); + void Compute_Potential_Boundaries_Isolated(int dir, struct Parameters *P); + void Compute_Potential_Isolated_Boundary(int direction, int side, int bc_potential_type); #ifdef SOR - void Get_Potential_SOR( Real Grav_Constant, Real dens_avrg, Real current_a, struct parameters *P ); - int Load_Poisson_Boundary_To_Buffer( int direction, int side, Real *buffer ); - void Unload_Poisson_Boundary_From_Buffer( int direction, int side, Real *buffer_host ); + void Get_Potential_SOR(Real Grav_Constant, Real dens_avrg, Real current_a, struct Parameters *P); + int Load_Poisson_Boundary_To_Buffer(int direction, int side, Real *buffer); + void Unload_Poisson_Boundary_From_Buffer(int direction, int side, Real *buffer_host); #endif #ifdef GRAVITY_GPU void Copy_Hydro_Density_to_Gravity_GPU(); void Extrapolate_Grav_Potential_GPU(); - int Load_Gravity_Potential_To_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ); - void Unload_Gravity_Potential_from_Buffer_GPU( int direction, int side, Real *buffer, int buffer_start ); - void Set_Potential_Boundaries_Isolated_GPU( int direction, int side, int *flags ); - void Set_Potential_Boundaries_Periodic_GPU( int direction, int side, int *flags ); + int Load_Gravity_Potential_To_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start); + void Unload_Gravity_Potential_from_Buffer_GPU(int direction, int side, Real *buffer, int buffer_start); + void Set_Potential_Boundaries_Isolated_GPU(int direction, int side, int *flags); + void Set_Potential_Boundaries_Periodic_GPU(int direction, int side, int *flags); #endif - #endif//GRAVITY +#endif // GRAVITY - #ifdef GRAVITY_ANALYTIC_COMP - void Add_Analytic_Potential(struct parameters *P); - void Add_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy& gal); - #endif //GRAVITY_ANALYTIC_COMP +#ifdef GRAVITY_ANALYTIC_COMP + void Add_Analytic_Potential(); + void Add_Analytic_Potential(int g_start, int g_end); + void Setup_Analytic_Potential(struct Parameters *P); + void Setup_Analytic_Galaxy_Potential(int g_start, int g_end, DiskGalaxy &gal); + #ifdef GRAVITY_GPU + void Add_Analytic_Potential_GPU(); + #endif +#endif // GRAVITY_ANALYTIC_COMP - #ifdef PARTICLES - void Initialize_Particles( struct parameters *P ); +#ifdef PARTICLES + void Initialize_Particles(struct Parameters *P); void Initialize_Uniform_Particles(); - void Copy_Particles_Density_function( int g_start, int g_end ); + void Copy_Particles_Density_function(int g_start, int g_end); void Copy_Particles_Density(); - void Copy_Particles_Density_to_Gravity(struct parameters P); - void Set_Particles_Density_Boundaries_Periodic( int direction, int side ); - void Transfer_Particles_Boundaries( struct parameters P ); - Real Update_Grid_and_Particles_KDK( struct parameters P ); - void Set_Particles_Boundary( int dir, int side); - void Set_Particles_Open_Boundary(int dir, int side); + void Copy_Particles_Density_to_Gravity(struct Parameters P); + void Set_Particles_Density_Boundaries_Periodic(int direction, int side); + void Transfer_Particles_Boundaries(struct Parameters P); + Real Update_Grid_and_Particles_KDK(struct Parameters P); + void Set_Particles_Boundary(int dir, int side); + #ifdef PARTICLES_CPU + void Set_Particles_Open_Boundary_CPU(int dir, int side); + #endif #ifdef MPI_CHOLLA - int Load_Particles_Density_Boundary_to_Buffer( int direction, int side, Real *buffer ); - void Unload_Particles_Density_Boundary_From_Buffer( int direction, int side, Real *buffer ); - void Load_and_Send_Particles_X0( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_X1( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Y0( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Y1( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Z0( int ireq_n_particles, int ireq_particles_transfer ); - void Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particles_transfer ); - void Unload_Particles_from_Buffer_X0( int *flags ); - void Unload_Particles_from_Buffer_X1( int *flags ); - void Unload_Particles_from_Buffer_Y0( int *flags ); - void Unload_Particles_from_Buffer_Y1( int *flags ); - void Unload_Particles_from_Buffer_Z0( int *flags ); - void Unload_Particles_from_Buffer_Z1( int *flags ); + int Load_Particles_Density_Boundary_to_Buffer(int direction, int side, Real *buffer); + void Unload_Particles_Density_Boundary_From_Buffer(int direction, int side, Real *buffer); + void Load_and_Send_Particles_X0(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_X1(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Y0(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Y1(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Z0(int ireq_n_particles, int ireq_particles_transfer); + void Load_and_Send_Particles_Z1(int ireq_n_particles, int ireq_particles_transfer); + void Unload_Particles_from_Buffer_X0(int *flags); + void Unload_Particles_from_Buffer_X1(int *flags); + void Unload_Particles_from_Buffer_Y0(int *flags); + void Unload_Particles_from_Buffer_Y1(int *flags); + void Unload_Particles_from_Buffer_Z0(int *flags); + void Unload_Particles_from_Buffer_Z1(int *flags); void Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(int dir, int *flags); void Load_NTtransfer_and_Request_Receive_Particles_Transfer(int index, int *ireq_particles_transfer); void Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags); - void Unload_Particles_From_Buffers_BLOCK(int index, int *flags ); + void Unload_Particles_From_Buffers_BLOCK(int index, int *flags); void Finish_Particles_Transfer(); - #endif//MPI_CHOLLA - void Transfer_Particles_Density_Boundaries( struct parameters P ); - void Copy_Particles_Density_Buffer_Device_to_Host( int direction, int side, Real *buffer_d, Real *buffer_h ); - // void Transfer_Particles_Boundaries( struct parameters P ); - void WriteData_Particles( struct parameters P, int nfile); - void OutputData_Particles( struct parameters P, int nfile); - void Load_Particles_Data( struct parameters P); + #endif // MPI_CHOLLA + void Transfer_Particles_Density_Boundaries(struct Parameters P); + void Copy_Particles_Density_Buffer_Device_to_Host(int direction, int side, Real *buffer_d, Real *buffer_h); + // void Transfer_Particles_Boundaries( struct Parameters P ); + void WriteData_Particles(struct Parameters P, int nfile); + void OutputData_Particles(struct Parameters P, int nfile); + void Load_Particles_Data(struct Parameters P); #ifdef HDF5 - void Write_Particles_Header_HDF5( hid_t file_id); - void Write_Particles_Data_HDF5( hid_t file_id); + void Write_Particles_Header_HDF5(hid_t file_id); + void Write_Particles_Data_HDF5(hid_t file_id); void Load_Particles_Data_HDF5(hid_t file_id, int nfile); - #endif//HDF5 - void Get_Gravity_Field_Particles_function( int g_start, int g_end ); + #endif // HDF5 + void Get_Gravity_Field_Particles_function(int g_start, int g_end); void Get_Gravity_Field_Particles(); - void Get_Gravity_CIC_function( part_int_t p_start, part_int_t p_end ); + void Get_Gravity_CIC_function(part_int_t p_start, part_int_t p_end); void Get_Gravity_CIC(); - void Advance_Particles_KDK_Step1( ); - void Advance_Particles_KDK_Step2( ); - void Advance_Particles_KDK_Step1_function( part_int_t p_start, part_int_t p_end ); - void Advance_Particles_KDK_Step2_function( part_int_t p_start, part_int_t p_end ); + void Advance_Particles_KDK_Step1(); + void Advance_Particles_KDK_Step2(); + void Advance_Particles_KDK_Step1_function(part_int_t p_start, part_int_t p_end); + void Advance_Particles_KDK_Step2_function(part_int_t p_start, part_int_t p_end); void Get_Particles_Acceleration(); - void Advance_Particles( int N_KDK_step ); - Real Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end ); + void Advance_Particles(int N_KDK_step); + Real Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end); Real Calc_Particles_dt(); #ifdef PARTICLES_GPU Real Calc_Particles_dt_GPU(); void Advance_Particles_KDK_Step1_GPU(); void Advance_Particles_KDK_Step2_GPU(); - void Set_Particles_Boundary_GPU( int dir, int side); - void Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side ); - #endif//PARTICLES_GPU + void Set_Particles_Boundary_GPU(int dir, int side); + void Set_Particles_Density_Boundaries_Periodic_GPU(int direction, int side); + #endif // PARTICLES_GPU #ifdef GRAVITY_GPU void Copy_Potential_From_GPU(); void Copy_Particles_Density_to_GPU(); void Copy_Particles_Density_GPU(); - int Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ); - void Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ); - #endif//GRAVITY_GPU - #endif//PARTICLES - - #ifdef COSMOLOGY - void Initialize_Cosmology( struct parameters *P ); - void Change_DM_Frame_System( bool forward ); - void Change_GAS_Frame_System( bool forward ); - void Change_GAS_Frame_System_GPU( bool forward ); - void Change_Cosmological_Frame_Sytem( bool forward ); - void Advance_Particles_KDK_Cosmo_Step1_function( part_int_t p_start, part_int_t p_end ); - void Advance_Particles_KDK_Cosmo_Step2_function( part_int_t p_start, part_int_t p_end ); - Real Calc_Particles_dt_Cosmo_function( part_int_t p_start, part_int_t p_end ); + int Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, Real *buffer); + void Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, int side, Real *buffer); + #endif // GRAVITY_GPU +#endif // PARTICLES + +#ifdef COSMOLOGY + void Initialize_Cosmology(struct Parameters *P); + void Change_DM_Frame_System(bool forward); + void Change_GAS_Frame_System(bool forward); + void Change_GAS_Frame_System_GPU(bool forward); + void Change_Cosmological_Frame_Sytem(bool forward); + void Advance_Particles_KDK_Cosmo_Step1_function(part_int_t p_start, part_int_t p_end); + void Advance_Particles_KDK_Cosmo_Step2_function(part_int_t p_start, part_int_t p_end); + Real Calc_Particles_dt_Cosmo_function(part_int_t p_start, part_int_t p_end); Real Calc_Particles_dt_Cosmo(); #ifdef PARTICLES_GPU void Advance_Particles_KDK_Cosmo_Step1_GPU(); void Advance_Particles_KDK_Cosmo_Step2_GPU(); - #endif//PARTICLES_GPU - #endif//COSMOLOGY + #endif // PARTICLES_GPU +#endif // COSMOLOGY - #ifdef COOLING_GRACKLE - void Initialize_Grackle( struct parameters *P ); +#ifdef COOLING_GRACKLE + void Initialize_Grackle(struct Parameters *P); void Allocate_Memory_Grackle(); void Initialize_Fields_Grackle(); - void Copy_Fields_To_Grackle_function( int g_start, int g_end ); + void Copy_Fields_To_Grackle_function(int g_start, int g_end); void Copy_Fields_To_Grackle(); - void Update_Internal_Energy_function( int g_start, int g_end ); + void Update_Internal_Energy_function(int g_start, int g_end); void Update_Internal_Energy(); void Do_Cooling_Step_Grackle(); - #endif +#endif - #ifdef CHEMISTRY_GPU - void Initialize_Chemistry( struct parameters *P ); - void Compute_Gas_Temperature( Real *temperature, bool convert_cosmo_units ); +#ifdef CHEMISTRY_GPU + void Initialize_Chemistry(struct Parameters *P); + void Compute_Gas_Temperature(Real *temperature, bool convert_cosmo_units); void Update_Chemistry(); - #endif +#endif - #ifdef ANALYSIS - void Initialize_Analysis_Module( struct parameters *P ); - void Compute_and_Output_Analysis( struct parameters *P ); - void Output_Analysis( struct parameters *P ); - void Write_Analysis_Header_HDF5( hid_t file_id ); - void Write_Analysis_Data_HDF5( hid_t file_id ); +#ifdef ANALYSIS + void Initialize_AnalysisModule(struct Parameters *P); + void Compute_and_Output_Analysis(struct Parameters *P); + void Output_Analysis(struct Parameters *P); + void Write_Analysis_Header_HDF5(hid_t file_id); + void Write_Analysis_Data_HDF5(hid_t file_id); #ifdef PHASE_DIAGRAM void Compute_Phase_Diagram(); #endif #ifdef LYA_STATISTICS - void Populate_Lya_Skewers_Local( int axis ); - void Compute_Transmitted_Flux_Skewer( int skewer_id, int axis ); - void Compute_Lya_Statistics( ); - void Compute_Flux_Power_Spectrum_Skewer( int skewer_id, int axis ); - void Initialize_Power_Spectrum_Measurements( int axis ); - #ifdef OUTPUT_SKEWERS - void Output_Skewers_File( struct parameters *P ); - void Write_Skewers_Header_HDF5( hid_t file_id ); - void Write_Skewers_Data_HDF5( hid_t file_id ); - #endif - #endif//LYA_STATISTICS - #endif//ANALYSIS - - #ifdef PARTICLES - #ifdef DE - #ifdef PARTICLE_AGE - void Cluster_Feedback(); - void Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end); - #endif - #endif - #endif - + void Populate_Lya_Skewers_Local(int axis); + void Compute_Transmitted_Flux_Skewer(int skewer_id, int axis); + void Compute_Lya_Statistics(); + void Compute_Flux_Power_Spectrum_Skewer(int skewer_id, int axis); + void Initialize_Power_Spectrum_Measurements(int axis); + #ifdef OUTPUT_SKEWERS + void Output_Skewers_File(struct Parameters *P); + void Write_Skewers_Header_HDF5(hid_t file_id); + void Write_Skewers_Data_HDF5(hid_t file_id); + #endif + #endif // LYA_STATISTICS +#endif // ANALYSIS }; // typedef for Grid3D_PointerMemberFunction typedef void (Grid3D::*Grid3D_PMF_UnloadHydroBuffer)(Real *); -typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential) - (int, int, Real *, int); -typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity) - (int, int, Real *); +typedef void (Grid3D::*Grid3D_PMF_UnloadGravityPotential)(int, int, Real *, int); +typedef void (Grid3D::*Grid3D_PMF_UnloadParticleDensity)(int, int, Real *); -#endif //GRID3D_H +#endif // GRID3D_H diff --git a/src/grid/grid_enum.h b/src/grid/grid_enum.h new file mode 100644 index 000000000..15e1d604a --- /dev/null +++ b/src/grid/grid_enum.h @@ -0,0 +1,113 @@ +#pragma once + +// An enum which holds offsets for grid quantities +// In the final form of this approach, this file will also set nfields (not yet) +// and NSCALARS (done) so that adding a field only requires registering it here: +// grid knows to allocate memory based on nfields and NSCALARS +// and values can be accessed with density[id + ncells*grid_enum::enum_name] +// example: C.device[id + H.n_cells*grid_enum::basic_scalar] + +// enum notes: +// For advanced devs: must be "unscoped" to be implicitly treated as int: this +// means cannot use "enum class" or "enum struct" Wrapped in namespace to give +// it an effective scope to prevent collisions enum values (i.e. density) belong +// to their enclosing scope, which necessitates the namespace wrapping +// --otherwise "density" would be available in global scope +// ": int" forces underlying type to be int + +namespace grid_enum +{ +enum : int { + + // Don't change order of hydro quantities until all of hydro is made + // consistent with grid_enum (if ever) because enum values depend on order + density, + momentum_x, + momentum_y, + momentum_z, + Energy, + + // Code assumes scalars are a contiguous block + // Always define scalar, scalar_minus_1, finalscalar_plus_1, finalscalar to + // compute NSCALARS + scalar, + scalar_minus_1 = scalar - 1, // so that next enum item starts at same index as scalar + +#ifdef SCALAR + // Add scalars here, wrapped appropriately with ifdefs: + #ifdef BASIC_SCALAR + basic_scalar, + #endif + + #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) + HI_density, + HII_density, + HeI_density, + HeII_density, + HeIII_density, + e_density, + #ifdef GRACKLE_METALS + metal_density, + #endif + #endif + + #ifdef DUST + dust_density, + #endif // DUST + +#endif // SCALAR + + finalscalar_plus_1, // needed to calculate NSCALARS + finalscalar = finalscalar_plus_1 - 1, // resets enum to finalscalar so fields afterwards are correct +// so that anything after starts with scalar + NSCALARS + +#ifdef MHD + magnetic_x, + magnetic_y, + magnetic_z, +#endif +#ifdef DE + GasEnergy, +#endif + num_fields, + + // Aliases and manually computed enums + nscalars = finalscalar_plus_1 - scalar, + +#ifdef MHD + num_flux_fields = num_fields - 1, + num_interface_fields = num_fields - 1, +#else + num_flux_fields = num_fields, + num_interface_fields = num_fields, +#endif // MHD + +#ifdef MHD + magnetic_start = magnetic_x, + magnetic_end = magnetic_z, + + ct_elec_x = 0, + ct_elec_y = 1, + ct_elec_z = 2, + + // Note that the direction of the flux, the suffix _? indicates the direction + // of the electric field, not the magnetic flux + fluxX_magnetic_z = magnetic_start, + fluxX_magnetic_y = magnetic_start + 1, + fluxY_magnetic_x = magnetic_start, + fluxY_magnetic_z = magnetic_start + 1, + fluxZ_magnetic_y = magnetic_start, + fluxZ_magnetic_x = magnetic_start + 1, + + Q_x_magnetic_y = magnetic_start, + Q_x_magnetic_z = magnetic_start + 1, + Q_y_magnetic_z = magnetic_start, + Q_y_magnetic_x = magnetic_start + 1, + Q_z_magnetic_x = magnetic_start, + Q_z_magnetic_y = magnetic_start + 1 +#endif // MHD + +}; +} // namespace grid_enum + +#define NSCALARS grid_enum::nscalars diff --git a/src/grid/initial_conditions.cpp b/src/grid/initial_conditions.cpp index 1a0a03381..af558be8f 100644 --- a/src/grid/initial_conditions.cpp +++ b/src/grid/initial_conditions.cpp @@ -1,106 +1,112 @@ /*! \file initial_conditions.cpp * \brief Definitions of initial conditions for different tests. - Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. - Functions are members of the Grid3D class. */ + Note that the grid is mapped to 1D as i + (x_dim)*j + + (x_dim*y_dim)*k. Functions are members of the Grid3D class. */ - -#include #include +#include +#include #include #include + +#include +#include +#include +#include + #include "../global/global.h" #include "../grid/grid3D.h" -#include "../mpi/mpi_routines.h" #include "../io/io.h" +#include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" -#include -#include -#include -#include - -using namespace std; - -/*! \fn void Set_Initial_Conditions(parameters P) - * \brief Set the initial conditions based on info in the parameters structure. */ -void Grid3D::Set_Initial_Conditions(parameters P) { - +#include "../utils/hydro_utilities.h" +#include "../utils/math_utilities.h" +#include "../utils/mhd_utilities.h" + +/*! \fn void Set_Initial_Conditions(Parameters P ) + * \brief Set the initial conditions based on info in the parameters structure. + */ +void Grid3D::Set_Initial_Conditions(Parameters P) +{ Set_Domain_Properties(P); Set_Gammas(P.gamma); - if (strcmp(P.init, "Constant")==0) { - Constant(P.rho, P.vx, P.vy, P.vz, P.P, P.Bx, P.By, P.Bz); - } else if (strcmp(P.init, "Sound_Wave")==0) { - Sound_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); - } else if (strcmp(P.init, "Square_Wave")==0) { - Square_Wave(P.rho, P.vx, P.vy, P.vz, P.P, P.A); - } else if (strcmp(P.init, "Riemann")==0) { - Riemann(P.rho_l, P.vx_l, P.vy_l, P.vz_l, P.P_l, P.Bx_l, P.By_l, P.Bz_l, - P.rho_r, P.vx_r, P.vy_r, P.vz_r, P.P_r, P.Bx_r, P.By_r, P.Bz_r, - P.diaph); - } else if (strcmp(P.init, "Shu_Osher")==0) { + if (strcmp(P.init, "Constant") == 0) { + Constant(P); + } else if (strcmp(P.init, "Sound_Wave") == 0) { + Sound_Wave(P); + } else if (strcmp(P.init, "Linear_Wave") == 0) { + Linear_Wave(P); + } else if (strcmp(P.init, "Square_Wave") == 0) { + Square_Wave(P); + } else if (strcmp(P.init, "Riemann") == 0) { + Riemann(P); + } else if (strcmp(P.init, "Shu_Osher") == 0) { Shu_Osher(); - } else if (strcmp(P.init, "Blast_1D")==0) { + } else if (strcmp(P.init, "Blast_1D") == 0) { Blast_1D(); - } else if (strcmp(P.init, "KH")==0) { + } else if (strcmp(P.init, "KH") == 0) { KH(); - } else if (strcmp(P.init, "KH_res_ind")==0) { + } else if (strcmp(P.init, "KH_res_ind") == 0) { KH_res_ind(); - } else if (strcmp(P.init, "Rayleigh_Taylor")==0) { + } else if (strcmp(P.init, "Rayleigh_Taylor") == 0) { Rayleigh_Taylor(); - } else if (strcmp(P.init, "Implosion_2D")==0) { + } else if (strcmp(P.init, "Implosion_2D") == 0) { Implosion_2D(); - } else if (strcmp(P.init, "Gresho")==0) { + } else if (strcmp(P.init, "Gresho") == 0) { Gresho(); - } else if (strcmp(P.init, "Noh_2D")==0) { + } else if (strcmp(P.init, "Noh_2D") == 0) { Noh_2D(); - } else if (strcmp(P.init, "Noh_3D")==0) { + } else if (strcmp(P.init, "Noh_3D") == 0) { Noh_3D(); - } else if (strcmp(P.init, "Disk_2D")==0) { + } else if (strcmp(P.init, "Disk_2D") == 0) { Disk_2D(); - } else if (strcmp(P.init, "Disk_3D")==0) { - Disk_3D(P); - } else if (strcmp(P.init, "Disk_3D_particles")==0) { - #ifndef ONLY_PARTICLES + } else if (strcmp(P.init, "Disk_3D") == 0 || strcmp(P.init, "Disk_3D_particles") == 0) { Disk_3D(P); - #else - // Initialize a m hydro grid when only integrating particles - Uniform_Grid(); - #endif - } else if (strcmp(P.init, "Spherical_Overpressure_3D")==0) { + } else if (strcmp(P.init, "Spherical_Overpressure_3D") == 0) { Spherical_Overpressure_3D(); - } else if (strcmp(P.init, "Spherical_Overdensity_3D")==0) { - Spherical_Overdensity_3D(); - } else if (strcmp(P.init, "Clouds")==0) { - Clouds(); - } else if (strcmp(P.init, "Read_Grid")==0) { - #ifndef ONLY_PARTICLES + } else if (strcmp(P.init, "Spherical_Overdensity_3D") == 0) { + Spherical_Overdensity_3D(); + } else if (strcmp(P.init, "Clouds") == 0) { + Clouds(); + } else if (strcmp(P.init, "Read_Grid") == 0) { +#ifndef ONLY_PARTICLES Read_Grid(P); - #else // ONLY_PARTICLES +#else // ONLY_PARTICLES // Initialize a uniform hydro grid when only integrating particles Uniform_Grid(); - #endif // ONLY_PARTICLES - } else if (strcmp(P.init, "Uniform")==0) { +#endif // ONLY_PARTICLES + } else if (strcmp(P.init, "Read_Grid_Cat") == 0) { + Read_Grid_Cat(P); + } else if (strcmp(P.init, "Uniform") == 0) { Uniform_Grid(); - } else if (strcmp(P.init, "Zeldovich_Pancake")==0) { + } else if (strcmp(P.init, "Zeldovich_Pancake") == 0) { Zeldovich_Pancake(P); - } else if (strcmp(P.init, "Chemistry_Test")==0) { + } else if (strcmp(P.init, "Chemistry_Test") == 0) { Chemistry_Test(P); +#ifdef MHD + } else if (strcmp(P.init, "Circularly_Polarized_Alfven_Wave") == 0) { + Circularly_Polarized_Alfven_Wave(P); + } else if (strcmp(P.init, "Advecting_Field_Loop") == 0) { + Advecting_Field_Loop(P); + } else if (strcmp(P.init, "MHD_Spherical_Blast") == 0) { + MHD_Spherical_Blast(P); + } else if (strcmp(P.init, "Orszag_Tang_Vortex") == 0) { + Orszag_Tang_Vortex(); +#endif // MHD } else { - chprintf ("ABORT: %s: Unknown initial conditions!\n", P.init); + chprintf("ABORT: %s: Unknown initial conditions!\n", P.init); chexit(-1); } - if ( C.device != NULL ) - { - CudaSafeCall( - cudaMemcpy(C.device, C.density, H.n_fields*H.n_cells*sizeof(Real), - cudaMemcpyHostToDevice) ); - } + if (C.device != NULL) { + GPU_Error_Check(cudaMemcpy(C.device, C.density, H.n_fields * H.n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + } } -/*! \fn void Set_Domain_Properties(struct parameters P) +/*! \fn void Set_Domain_Properties(struct Parameters P) * \brief Set local domain properties */ -void Grid3D::Set_Domain_Properties(struct parameters P) +void Grid3D::Set_Domain_Properties(struct Parameters P) { // Global Boundary Coordinates H.xbound = P.xmin; @@ -113,9 +119,9 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.zdglobal = P.zlen; #ifndef MPI_CHOLLA - Real nx_param = (Real) (H.nx - 2*H.n_ghost); - Real ny_param = (Real) (H.ny - 2*H.n_ghost); - Real nz_param = (Real) (H.nz - 2*H.n_ghost); + Real nx_param = (Real)(H.nx - 2 * H.n_ghost); + Real ny_param = (Real)(H.ny - 2 * H.n_ghost); + Real nz_param = (Real)(H.nz - 2 * H.n_ghost); // Local Boundary Coordinates H.xblocal = H.xbound; @@ -123,13 +129,13 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.zblocal = H.zbound; H.xblocal_max = H.xblocal + P.xlen; - H.yblocal_max = H.yblocal + P.ylen; - H.zblocal_max = H.zblocal + P.zlen; + H.yblocal_max = H.yblocal + P.ylen; + H.zblocal_max = H.zblocal + P.zlen; #else - Real nx_param = (Real) nx_global; - Real ny_param = (Real) ny_global; - Real nz_param = (Real) nz_global; + Real nx_param = (Real)nx_global; + Real ny_param = (Real)ny_global; + Real nz_param = (Real)nz_global; // Local Boundary Coordinates /* @@ -137,47 +143,41 @@ void Grid3D::Set_Domain_Properties(struct parameters P) H.yblocal = H.ybound + P.ylen * ((Real) ny_local_start) / ny_param; H.zblocal = H.zbound + P.zlen * ((Real) nz_local_start) / nz_param; */ - H.xblocal = H.xbound + ((Real) nx_local_start) * (P.xlen / nx_param); - H.yblocal = H.ybound + ((Real) ny_local_start) * (P.ylen / ny_param); - H.zblocal = H.zbound + ((Real) nz_local_start) * (P.zlen / nz_param); + H.xblocal = H.xbound + ((Real)nx_local_start) * (P.xlen / nx_param); + H.yblocal = H.ybound + ((Real)ny_local_start) * (P.ylen / ny_param); + H.zblocal = H.zbound + ((Real)nz_local_start) * (P.zlen / nz_param); - H.xblocal_max = H.xbound + ((Real) (nx_local_start + H.nx - 2*H.n_ghost)) * (P.xlen / nx_param); - H.yblocal_max = H.ybound + ((Real) (ny_local_start + H.ny - 2*H.n_ghost)) * (P.ylen / ny_param); - H.zblocal_max = H.zbound + ((Real) (nz_local_start + H.nz - 2*H.n_ghost)) * (P.zlen / nz_param); + H.xblocal_max = H.xbound + ((Real)(nx_local_start + H.nx - 2 * H.n_ghost)) * (P.xlen / nx_param); + H.yblocal_max = H.ybound + ((Real)(ny_local_start + H.ny - 2 * H.n_ghost)) * (P.ylen / ny_param); + H.zblocal_max = H.zbound + ((Real)(nz_local_start + H.nz - 2 * H.n_ghost)) * (P.zlen / nz_param); #endif /*perform 1-D first*/ - if(H.nx > 1 && H.ny==1 && H.nz==1) - { + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { H.dx = P.xlen / nx_param; H.dy = P.ylen; H.dz = P.zlen; } /*perform 2-D next*/ - if(H.nx > 1 && H.ny>1 && H.nz==1) - { + if (H.nx > 1 && H.ny > 1 && H.nz == 1) { H.dx = P.xlen / nx_param; H.dy = P.ylen / ny_param; H.dz = P.zlen; } /*perform 3-D last*/ - if(H.nx>1 && H.ny>1 && H.nz>1) - { + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { H.dx = P.xlen / nx_param; H.dy = P.ylen / ny_param; H.dz = P.zlen / nz_param; - } } - - -/*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) - * \brief Constant gas properties. */ -void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real By, Real Bz) +/*! \fn void Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real + * By, Real Bz) \brief Constant gas properties. */ +void Grid3D::Constant(Parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; @@ -186,297 +186,404 @@ void Grid3D::Constant(Real rho, Real vx, Real vy, Real vz, Real P, Real Bx, Real Real n, T; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart-1; k= kstart) and (j >= jstart) and (i >= istart)) - { + if ((k >= kstart) and (j >= jstart) and (i >= istart)) { // set constant initial states - C.density[id] = rho; - C.momentum_x[id] = rho*vx; - C.momentum_y[id] = rho*vy; - C.momentum_z[id] = rho*vz; - C.Energy[id] = P/(gama-1.0) + 0.5*rho*(vx*vx + vy*vy + vz*vz); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif // DE + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); +#ifdef DE + C.GasEnergy[id] = P.P / (gama - 1.0); +#endif // DE } -/* - if (i==istart && j==jstart && k==kstart) { - n = rho*DENSITY_UNIT / (mu*MP); - T = P*PRESSURE_UNIT / (n*KB); + if (i == istart && j == jstart && k == kstart) { + n = P.rho * DENSITY_UNIT / (mu * MP); + T = P.P * PRESSURE_UNIT / (n * KB); printf("Initial n = %e, T = %e\n", n, T); } -*/ } } } - } - /*! \fn void Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) * \brief Sine wave perturbation. */ -void Grid3D::Sound_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) +void Grid3D::Sound_Wave(Parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; Real x_pos, y_pos, z_pos; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart; k vectorPotential(3 * H.n_cells, 0); + + // // lambda function for computing the vector potential + // auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) { + // // The "_rot" variables are the rotated version + // Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch; + // Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw; + + // Real const a_y = P.Bz * x_rot - (P.A * P.rEigenVec_Bz / wavenumber) * std::cos(wavenumber * x_rot); + // Real const a_z = -P.By * x_rot + (P.A * P.rEigenVec_By / wavenumber) * std::cos(wavenumber * x_rot) + P.Bx * + // y_rot; + + // return std::make_pair(a_y, a_z); + // }; + + // for (size_t k = 0; k < H.nz; k++) { + // for (size_t j = 0; j < H.ny; j++) { + // for (size_t i = 0; i < H.nx; i++) { + // // Get cell index + // size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Real x, y, z; + // Get_Position(i, j, k, &x, &y, &z); + + // auto vectorPot = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.); + // vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw; + + // vectorPot = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.); + // vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw; + + // vectorPot = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z); + // vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch; + // } + // } + // } + + // // Compute the magnetic field from the vector potential + // // ==================================================== + // mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + Real shift = H.dx; + size_t dir = 0; + if (sin_yaw == 1.0) { + shift = H.dy; + dir = 1; + } else if (sin_pitch == 1.0) { + shift = H.dz; + dir = 2; + } + + // set initial values of conserved variables + for (int k = H.n_ghost; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost; i < H.nx - H.n_ghost; i++) { + // get cell index + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch; + + Real const sine_x = std::sin(x_pos_rot * wavenumber); + + Real bx = P.Bx + P.A * P.rEigenVec_Bx * sine_x; + Real by = P.By + P.A * P.rEigenVec_By * sine_x; + Real bz = P.Bz + P.A * P.rEigenVec_Bz * sine_x; + + C.magnetic_x[id] = bx * cos_pitch * cos_yaw - by * sin_yaw - bz * sin_pitch * cos_yaw; + C.magnetic_y[id] = bx * cos_pitch * sin_yaw + by * cos_yaw - bz * sin_pitch * sin_yaw; + C.magnetic_z[id] = bx * sin_pitch + bz * cos_pitch; + } + } + } +#endif // MHD + + // Compute the hydro variables + // =========================== + for (size_t k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (size_t j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (size_t i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = cos_pitch * (x_pos * cos_yaw + y_pos * sin_yaw) + z_pos * sin_pitch; + + Real const sine_x = std::sin(x_pos_rot * wavenumber); + + // Density + C.density[id] = P.rho + P.A * P.rEigenVec_rho * sine_x; + + // Momenta + Real mx = P.rho * P.vx + P.A * P.rEigenVec_MomentumX * sine_x; + Real my = P.A * P.rEigenVec_MomentumY * sine_x; + Real mz = P.A * P.rEigenVec_MomentumZ * sine_x; + + C.momentum_x[id] = mx * cos_pitch * cos_yaw - my * sin_yaw - mz * sin_pitch * cos_yaw; + C.momentum_y[id] = mx * cos_pitch * sin_yaw + my * cos_yaw - mz * sin_pitch * sin_yaw; + C.momentum_z[id] = mx * sin_pitch + mz * cos_pitch; + + // Energy + C.Energy[id] = P.P / (P.gamma - 1.0) + 0.5 * P.rho * P.vx * P.vx + P.A * sine_x * P.rEigenVec_E; +#ifdef MHD + C.Energy[id] += 0.5 * (P.Bx * P.Bx + P.By * P.By + P.Bz * P.Bz); +#endif // MHD + } + } + } +} /*! \fn void Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) - * \brief Square wave density perturbation with amplitude A*rho in pressure equilibrium. */ -void Grid3D::Square_Wave(Real rho, Real vx, Real vy, Real vz, Real P, Real A) + * \brief Square wave density perturbation with amplitude A*rho in pressure + * equilibrium. */ +void Grid3D::Square_Wave(Parameters const &P) { int i, j, k, id; int istart, jstart, kstart, iend, jend, kend; Real x_pos, y_pos, z_pos; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; if (H.ny > 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart; k 0.25*H.xdglobal && x_pos < 0.75*H.xdglobal) - { - C.density[id] = rho*A; - C.momentum_x[id] = rho*A * vx; - C.momentum_y[id] = rho*A * vy; - C.momentum_z[id] = rho*A * vz; - C.Energy[id] = P/(gama-1.0) + 0.5*rho*A*(vx*vx + vy*vy + vz*vz); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif - #ifdef SCALAR - C.scalar[id] = C.density[id]*1.0; - #endif + C.density[id] = P.rho; + // C.momentum_x[id] = 0.0; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + // C.momentum_z[id] = rho_l * v_l; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); +#ifdef DE + C.GasEnergy[id] = P.P / (gama - 1.0); +#endif +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id] * 0.0; + #endif +#endif + if (x_pos > 0.25 * H.xdglobal && x_pos < 0.75 * H.xdglobal) { + C.density[id] = P.rho * P.A; + C.momentum_x[id] = P.rho * P.A * P.vx; + C.momentum_y[id] = P.rho * P.A * P.vy; + C.momentum_z[id] = P.rho * P.A * P.vz; + C.Energy[id] = P.P / (gama - 1.0) + 0.5 * P.rho * P.A * (P.vx * P.vx + P.vy * P.vy + P.vz * P.vz); +#ifdef DE + C.GasEnergy[id] = P.P / (gama - 1.0); +#endif +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = C.density[id] * 1.0; + #endif +#endif } } } } } - -/*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph) +/*! \fn void Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real + Bx_l, Real By_l, Real Bz_l, Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real + P_r, Real Bx_r, Real By_r, Real Bz_r, Real diaph) * \brief Initialize the grid with a Riemann problem. */ -void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real Bx_l, Real By_l, Real Bz_l, - Real rho_r, Real vx_r, Real vy_r, Real vz_r, Real P_r, Real Bx_r, Real By_r, Real Bz_r, - Real diaph) +void Grid3D::Riemann(Parameters const &P) { - int i, j, k, id; - int istart, jstart, kstart, iend, jend, kend; - Real x_pos, y_pos, z_pos; - Real v, P, cs; - - istart = H.n_ghost; - iend = H.nx-H.n_ghost; + size_t const istart = H.n_ghost - 1; + size_t const iend = H.nx - H.n_ghost; + size_t jstart, kstart, jend, kend; if (H.ny > 1) { - jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jstart = H.n_ghost - 1; + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { - kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kstart = H.n_ghost - 1; + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } - #ifdef MHD - auto setMagnetFields = [&] () - { - Real x_pos_face = x_pos + 0.5 * H.dx; - - if (x_pos_face < diaph) - { - C.magnetic_x[id] = Bx_l; - C.magnetic_y[id] = By_l; - C.magnetic_z[id] = Bz_l; - } - else - { - C.magnetic_x[id] = Bx_r; - C.magnetic_y[id] = By_r; - C.magnetic_z[id] = Bz_r; - } - }; - #endif // MHD - // set initial values of conserved variables - for(k=kstart-1; k= kstart) and (j >= jstart) and (i >= istart)) - { - if (x_pos < diaph) - { - C.density[id] = rho_l; - C.momentum_x[id] = rho_l * vx_l; - C.momentum_y[id] = rho_l * vy_l; - C.momentum_z[id] = rho_l * vz_l; - C.Energy[id] = P_l/(gama-1.0) + 0.5*rho_l*(vx_l*vx_l + vy_l*vy_l + vz_l*vz_l); - #ifdef SCALAR - C.scalar[id] = 1.0*rho_l; - #endif //SCALAR - #ifdef DE - C.GasEnergy[id] = P_l/(gama-1.0); - #endif //DE - } - else - { - C.density[id] = rho_r; - C.momentum_x[id] = rho_r * vx_r; - C.momentum_y[id] = rho_r * vy_r; - C.momentum_z[id] = rho_r * vz_r; - C.Energy[id] = P_r/(gama-1.0) + 0.5*rho_r*(vx_r*vx_r + vy_r*vy_r + vz_r*vz_r); - #ifdef SCALAR - C.scalar[id] = 0.0*rho_r; - #endif //SCALAR - #ifdef DE - C.GasEnergy[id] = P_r/(gama-1.0); - #endif //DE + if ((k >= kstart) and (j >= jstart) and (i >= istart)) { + if (x_pos < P.diaph) { + C.density[id] = P.rho_l; + C.momentum_x[id] = P.rho_l * P.vx_l; + C.momentum_y[id] = P.rho_l * P.vy_l; + C.momentum_z[id] = P.rho_l * P.vz_l; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_l, P.rho_l, P.vx_l, P.vy_l, P.vz_l, gama, P.Bx_l, + P.By_l, P.Bz_l); +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0 * P.rho_l; + #endif +#endif // SCALAR +#ifdef DE + C.GasEnergy[id] = P.P_l / (gama - 1.0); +#endif // DE + } else { + C.density[id] = P.rho_r; + C.momentum_x[id] = P.rho_r * P.vx_r; + C.momentum_y[id] = P.rho_r * P.vy_r; + C.momentum_z[id] = P.rho_r * P.vz_r; + C.Energy[id] = hydro_utilities::Calc_Energy_Primitive(P.P_r, P.rho_r, P.vx_r, P.vy_r, P.vz_r, gama, P.Bx_r, + P.By_r, P.Bz_r); +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 0.0 * P.rho_r; + #endif +#endif // SCALAR +#ifdef DE + C.GasEnergy[id] = P.P_r / (gama - 1.0); +#endif // DE } } } @@ -484,9 +591,9 @@ void Grid3D::Riemann(Real rho_l, Real vx_l, Real vy_l, Real vz_l, Real P_l, Real } } - /*! \fn void Shu_Osher() - * \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone 2008, Section 8.1 */ + * \brief Initialize the grid with the Shu-Osher shock tube problem. See Stone + * 2008, Section 8.1 */ void Grid3D::Shu_Osher() { int i, id; @@ -494,42 +601,37 @@ void Grid3D::Shu_Osher() Real vx, P; // set initial values of conserved variables - for (i=H.n_ghost; i 0.9) - { - C.density[id] = 1.0; + P = 1000.0; + } else if (x_pos > 0.9) { + C.density[id] = 1.0; C.momentum_x[id] = 0.0; C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; - P = 100; - } - else - { - C.density[id] = 1.0; + P = 100; + } else { + C.density[id] = 1.0; C.momentum_x[id] = 0.0; C.momentum_y[id] = 0.0; C.momentum_z[id] = 0.0; - P = 0.01; + P = 0.01; } - C.Energy[id] = P/(gama-1.0); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif //DE - + C.Energy[id] = P / (gama - 1.0); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE } } - /*! \fn void KH() * \brief Initialize the grid with a Kelvin-Helmholtz instability. This version of KH test has a discontinuous boundary. @@ -591,79 +686,66 @@ void Grid3D::KH() d2 = 1.0; v1 = 0.5; v2 = -0.5; - P = 2.5; - A = 0.1; + P = 2.5; + A = 0.1; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; jstart = H.n_ghost; - jend = H.ny-H.n_ghost; + jend = H.ny - H.n_ghost; if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set the initial values of the conserved variables - for (k=kstart; k= 3.0 * H.ydglobal / 4.0)) { + C.density[id] = d2; + C.momentum_x[id] = v2 * C.density[id]; + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos); C.momentum_z[id] = 0.0; - #ifdef SCALAR - C.scalar[id] = 0.0; - #endif - } - else if (y_pos >= 3.0*H.ydglobal/4.0) - { - C.density[id] = d2; - C.momentum_x[id] = v2*C.density[id]; - C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); - C.momentum_z[id] = 0.0; - - #ifdef SCALAR - C.scalar[id] = 0.0; - #endif - } - // inner half of slab - else - { - C.density[id] = d1; - C.momentum_x[id] = v1*C.density[id]; - C.momentum_y[id] = C.density[id]*A*sin(4*PI*x_pos); +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 0.0; + #endif +#endif + // inner half of slab + } else { + C.density[id] = d1; + C.momentum_x[id] = v1 * C.density[id]; + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos); C.momentum_z[id] = 0.0; - - #ifdef SCALAR - C.scalar[id] = 1.0*d1; - #endif +#ifdef SCALAR + #ifdef BASIC_SCALAR + C.basic_scalar[id] = 1.0 * d1; + #endif +#endif } - C.Energy[id] = P/(gama-1.0) + 0.5*(C.momentum_x[id]*C.momentum_x[id] + C.momentum_y[id]*C.momentum_y[id])/C.density[id]; - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif //DE - - + C.Energy[id] = + P / (gama - 1.0) + + 0.5 * (C.momentum_x[id] * C.momentum_x[id] + C.momentum_y[id] * C.momentum_y[id]) / C.density[id]; +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE } } } - } - /*! \fn void KH_res_ind() - * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes are resolution independent. */ + * \brief Initialize the grid with a Kelvin-Helmholtz instability whose modes + * are resolution independent. */ void Grid3D::KH_res_ind() { int i, j, k, id; @@ -673,14 +755,13 @@ void Grid3D::KH_res_ind() Real r, yc, zc, phi; Real d1, d2, v1, v2, P, dy, A; istart = H.n_ghost; - iend = H.nx-H.n_ghost; + iend = H.nx - H.n_ghost; jstart = H.n_ghost; - jend = H.ny-H.n_ghost; + jend = H.ny - H.n_ghost; if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } @@ -689,99 +770,114 @@ void Grid3D::KH_res_ind() yc = 0.0; zc = 0.0; - d1 = 100.0; // inner density - d2 = 1.0; // outer density - v1 = 10.5; // inner velocity - v2 = 9.5; // outer velocity - P = 2.5; // pressure - dy = 0.05; // width of ramp function (see Robertson 2009) - A = 0.1; // amplitude of the perturbation + d1 = 100.0; // inner density + d2 = 1.0; // outer density + v1 = 0.5; // inner velocity + v2 = -0.5; // outer velocity + P = 2.5; // pressure + dy = 0.05; // width of ramp function (see Robertson 2009) + A = 0.1; // amplitude of the perturbation - // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 + exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy)); + // Note: ramp function from Robertson 2009 is 1/Ramp(y) = (1 + + // exp(2*(y-0.25)/dy))*(1 + exp(2*(0.75 - y)/dy)); // set the initial values of the conserved variables - for (k=kstart; k 0.5) - { - C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v1*C.density[id] - C.density[id] * (v1-v2) * exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2) /(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.75 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ) ; + // 2D initial conditions: + if (H.nz == 1) { + // inner fluid + if (fabs(y_pos - 0.5) < 0.25) { + if (y_pos > 0.5) { + C.density[id] = + d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v1 * C.density[id] - + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else { + C.density[id] = + d1 - (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v1 * C.density[id] - + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } } - else - { - C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v1*C.density[id] - C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2) /(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); + // outer fluid + else { + if (y_pos > 0.5) { + C.density[id] = + d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v2 * C.density[id] + + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.75 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else { + C.density[id] = + d2 + (d1 - d2) * exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = v2 * C.density[id] + + C.density[id] * (v1 - v2) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(y_pos - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } } - } - // outer fluid - else - { - if (y_pos > 0.5) + // C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); + C.momentum_z[id] = 0.0; + + // 3D initial conditions: + } else { + // cylindrical version (3D only) + r = sqrt((z_pos - zc) * (z_pos - zc) + (y_pos - yc) * (y_pos - yc)); // center the cylinder at yc, zc + phi = atan2((z_pos - zc), (y_pos - yc)); + + if (r < 0.25) // inside the cylinder { - C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v2*C.density[id] + C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.75 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - } - else + C.density[id] = d1 - (d1 - d2) * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = + v1 * C.density[id] - + C.density[id] * exp(-0.5 * pow(r - 0.25 - sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + } else // outside the cylinder { - C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v2*C.density[id] + C.density[id] * (v1 - v2) * exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(y_pos-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); + C.density[id] = d2 + (d1 - d2) * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_x[id] = + v2 * C.density[id] + + C.density[id] * exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy)); + C.momentum_y[id] = cos(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); + C.momentum_z[id] = sin(phi) * C.density[id] * A * sin(4 * M_PI * x_pos) * + (1.0 - exp(-0.5 * pow(r - 0.25 + sqrt(-2.0 * dy * dy * log(0.5)), 2) / (dy * dy))); } - } - //C.momentum_y[id] = C.density[id] * A*sin(4*PI*x_pos); - C.momentum_z[id] = 0.0; - - // cylindrical version (3D only) - r = sqrt((z_pos-zc)*(z_pos-zc) + (y_pos-yc)*(y_pos-yc)); // center the cylinder at yc, zc - phi = atan2((z_pos-zc), (y_pos-yc)); - - if (r < 0.25) // inside the cylinder - { - C.density[id] = d1 - (d1-d2)*exp( -0.5*pow(r-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v1*C.density[id] - C.density[id] * exp( -0.5*pow(r-0.25 - sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = cos(phi) * C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_z[id] = sin(phi) * C.density[id] * A*sin(4*PI*x_pos) * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - } - else // outside the cylinder - { - C.density[id] = d2 + (d1-d2)*exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_x[id] = v2*C.density[id] + C.density[id] * exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) ); - C.momentum_y[id] = cos(phi) * C.density[id] * A*sin(4*PI*x_pos) * (1.0 - exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) )); - C.momentum_z[id] = sin(phi) * C.density[id] * A*sin(4*PI*x_pos) * (1.0 - exp( -0.5*pow(r-0.25 + sqrt(-2.0*dy*dy*log(0.5)),2)/(dy*dy) )); - } - - // No matter what we do with the density and momentum, set the Energy and GasEnergy appropriately - mx = C.momentum_x[id]; - my = C.momentum_y[id]; - mz = C.momentum_z[id]; - C.Energy[id] = P/(gama-1.0) + 0.5*(mx*mx + my*my + mz*mz)/C.density[id]; - - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif // DE - }// i loop - }// j loop - }//k loop + // No matter what we do with the density and momentum, set the Energy + // and GasEnergy appropriately + mx = C.momentum_x[id]; + my = C.momentum_y[id]; + mz = C.momentum_z[id]; + C.Energy[id] = P / (gama - 1.0) + 0.5 * (mx * mx + my * my + mz * mz) / C.density[id]; +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE + } // i loop + } // j loop + } // k loop } - - /*! \fn void Rayleigh_Taylor() * \brief Initialize the grid with a 2D Rayleigh-Taylor instability. */ void Grid3D::Rayleigh_Taylor() @@ -791,51 +887,46 @@ void Grid3D::Rayleigh_Taylor() Real dl, du, vy, g, P, P_0; dl = 1.0; du = 2.0; - g = -0.1; + g = -0.1; // set the initial values of the conserved variables - for (j=H.n_ghost; j= 0.2 && r < 0.4) { - vx += -sin(phi)*(2.0-5.0*r) + v_boost; - vy += cos(phi)*(2.0-5.0*r); - P += 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r); - } - else { - vx += 0.0; - vy += 0.0; - P += 3.0 + 4.0*log(2.0); - } - } - vx = vx/N; - vy = vy/N; - P = P/N; -*/ + r = sqrt((x_pos - xc) * (x_pos - xc) + (y_pos - yc) * (y_pos - yc)); + phi = atan2((y_pos - yc), (x_pos - xc)); + + /* + // set vx, vy, P to zero before integrating + vx = 0.0; + vy = 0.0; + P = 0.0; + + // monte carlo sample to get an integrated value for vx, vy, P + for (int ii = 0; ii= 0.2 && r < 0.4) { + vx += -sin(phi)*(2.0-5.0*r) + v_boost; + vy += cos(phi)*(2.0-5.0*r); + P += 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r); + } + else { + vx += 0.0; + vy += 0.0; + P += 3.0 + 4.0*log(2.0); + } + } + vx = vx/N; + vy = vy/N; + P = P/N; + */ if (r < 0.2) { - vx = -sin(phi)*5.0*r + v_boost; - vy = cos(phi)*5.0*r; - P = 5.0 + 0.5*25.0*r*r; - } - else if (r >= 0.2 && r < 0.4) { - vx = -sin(phi)*(2.0-5.0*r) + v_boost; - vy = cos(phi)*(2.0-5.0*r); - P = 9.0 - 4.0*log(0.2) + 0.5*25.0*r*r - 20.0*r + 4.0*log(r); - } - else { + vx = -sin(phi) * 5.0 * r + v_boost; + vy = cos(phi) * 5.0 * r; + P = 5.0 + 0.5 * 25.0 * r * r; + } else if (r >= 0.2 && r < 0.4) { + vx = -sin(phi) * (2.0 - 5.0 * r) + v_boost; + vy = cos(phi) * (2.0 - 5.0 * r); + P = 9.0 - 4.0 * log(0.2) + 0.5 * 25.0 * r * r - 20.0 * r + 4.0 * log(r); + } else { vx = 0.0; vy = 0.0; - P = 3.0 + 4.0*log(2.0); + P = 3.0 + 4.0 * log(2.0); } // set P constant for modified Gresho problem - //P = 5.5; + // P = 5.5; // set values of conserved variables - C.density[id] = d; - C.momentum_x[id] = d*vx; - C.momentum_y[id] = d*vy; + C.density[id] = d; + C.momentum_x[id] = d * vx; + C.momentum_y[id] = d * vy; C.momentum_z[id] = 0.0; - C.Energy[id] = P/(gama-1.0) + 0.5*d*(vx*vx + vy*vy); - #ifdef DE - C.GasEnergy[id] = P/(gama-1.0); - #endif // DE + C.Energy[id] = P / (gama - 1.0) + 0.5 * d * (vx * vx + vy * vy); +#ifdef DE + C.GasEnergy[id] = P / (gama - 1.0); +#endif // DE - //r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc)); - //printf("%f %f %f %f %f\n", x_pos, y_pos, r, vx, vy); + // r = sqrt((x_pos-xc)*(x_pos-xc) + (y_pos-yc)*(y_pos-yc)); + // printf("%f %f %f %f %f\n", x_pos, y_pos, r, vx, vy); } } - - } - - /*! \fn void Implosion_2D() * \brief Implosion test described in Liska, 2003. */ void Grid3D::Implosion_2D() @@ -951,44 +1036,41 @@ void Grid3D::Implosion_2D() Real x_pos, y_pos, z_pos; Real P; - // set the initial values of the conserved variables - for (j=H.n_ghost; j 1) { jstart = H.n_ghost; - jend = H.ny-H.n_ghost; - } - else { + jend = H.ny - H.n_ghost; + } else { jstart = 0; jend = H.ny; } if (H.nz > 1) { kstart = H.n_ghost; - kend = H.nz-H.n_ghost; - } - else { + kend = H.nz - H.n_ghost; + } else { kstart = 0; kend = H.nz; } // set initial values of conserved variables - for(k=kstart; k= kstart) and (j >= jstart) and (i >= istart)) - { - C.density[id] = 0; + if ((k >= kstart) and (j >= jstart) and (i >= istart)) { + C.density[id] = 0; C.momentum_x[id] = 0; C.momentum_y[id] = 0; C.momentum_z[id] = 0; - C.Energy[id] = 0; + C.Energy[id] = 0; - #ifdef DE +#ifdef DE C.GasEnergy[id] = 0; - #endif +#endif } } } } } -void Grid3D::Zeldovich_Pancake( struct parameters P ){ - - #ifndef COSMOLOGY - chprintf( "To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n" ); +void Grid3D::Zeldovich_Pancake(struct Parameters P) +{ +#ifndef COSMOLOGY + chprintf("To run a Zeldovich Pancake COSMOLOGY has to be turned ON \n"); exit(-1); - #else - +#else int i, j, k, id; Real x_pos, y_pos, z_pos; Real H0, h, Omega_M, rho_0, G, z_zeldovich, z_init, x_center, T_init, k_x; chprintf("Setting Zeldovich Pancake initial conditions...\n"); - H0 = P.H0; - h = H0 / 100; + H0 = P.H0; + h = H0 / 100; Omega_M = P.Omega_M; - chprintf( " h = %f \n", h ); - chprintf( " Omega_M = %f \n", Omega_M ); + chprintf(" h = %f \n", h); + chprintf(" Omega_M = %f \n", Omega_M); - H0 /= 1000; //[km/s / kpc] - G = G_COSMO; - rho_0 = 3*H0*H0 / ( 8*M_PI*G ) * Omega_M /h / h; + H0 /= 1000; //[km/s / kpc] + G = G_COSMO; + rho_0 = 3 * H0 * H0 / (8 * M_PI * G) * Omega_M / h / h; z_zeldovich = 1; - z_init = P.Init_redshift; - chprintf( " rho_0 = %f \n", rho_0 ); - chprintf( " z_init = %f \n", z_init ); - chprintf( " z_zeldovich = %f \n", z_zeldovich ); + z_init = P.Init_redshift; + chprintf(" rho_0 = %f \n", rho_0); + chprintf(" z_init = %f \n", z_init); + chprintf(" z_zeldovich = %f \n", z_zeldovich); x_center = H.xdglobal / 2; - chprintf( " Peak Center = %f \n", x_center ); + chprintf(" Peak Center = %f \n", x_center); T_init = 100; - chprintf( " T initial = %f \n", T_init ); - - k_x = 2 * M_PI / H.xdglobal; + chprintf(" T initial = %f \n", T_init); + k_x = 2 * M_PI / H.xdglobal; char filename[100]; // create the filename to read from strcpy(filename, P.indir); strcat(filename, "ics_zeldovich.dat"); - chprintf( " Loading ICs File: %s\n", filename); + chprintf(" Loading ICs File: %s\n", filename); real_vector_t ics_values; - ifstream file_in( filename ); - string line; + std::ifstream file_in(filename); + std::string line; Real ic_val; - if (file_in.is_open()){ - while ( getline (file_in, line) ){ - ic_val = atof( line.c_str() ); - ics_values.push_back( ic_val ); + if (file_in.is_open()) { + while (getline(file_in, line)) { + ic_val = atof(line.c_str()); + ics_values.push_back(ic_val); // chprintf("%f\n", ic_val); } file_in.close(); - } - else{ + } else { chprintf(" Error: Unable to open ics zeldovich file\n"); exit(1); } int nPoints = 256; - - Real dens, vel, temp, U, E, gamma; gamma = P.gamma; int index; // set the initial values of the conserved variables - for (k=H.n_ghost; k vectorPotential(3 * H.n_cells, 0); + auto Compute_Vector_Potential = [&](Real const &x_loc, Real const &y_loc, Real const &z_loc) { + // The "_rot" variables are the rotated version + Real const x_rot = x_loc * cos_pitch * cos_yaw + y_loc * cos_pitch * sin_yaw + z_loc * sin_pitch; + Real const y_rot = -x_loc * sin_yaw + y_loc * cos_yaw; + + Real const a_y = P.polarization * (amplitude / wavenumber) * std::sin(wavenumber * x_rot); + Real const a_z = (amplitude / wavenumber) * std::cos(wavenumber * x_rot) + magnetic_x * y_rot; + + return std::make_pair(a_y, a_z); + }; + + for (int k = 0; k < H.nz; k++) { + for (int j = 0; j < H.ny; j++) { + for (int i = 0; i < H.nx; i++) { + // Get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + auto vectorPot = Compute_Vector_Potential(x, y + H.dy / 2., z + H.dz / 2.); + vectorPotential.at(id + 0 * H.n_cells) = -vectorPot.first * sin_yaw - vectorPot.second * sin_pitch * cos_yaw; + + vectorPot = Compute_Vector_Potential(x + H.dx / 2., y, z + H.dz / 2.); + vectorPotential.at(id + 1 * H.n_cells) = vectorPot.first * cos_yaw - vectorPot.second * sin_pitch * sin_yaw; + + vectorPot = Compute_Vector_Potential(x + H.dx / 2., y + H.dy / 2., z); + vectorPotential.at(id + 2 * H.n_cells) = vectorPot.second * cos_pitch; } } } - #else //COSMOLOGY - chprintf( "This requires COSMOLOGY turned on! \n"); - chexit(-1); - #endif //COSMOLOGY + // Compute the magnetic field + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + // set initial values of non-magnetic conserved variables + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // get cell-centered position + Real x_pos, y_pos, z_pos; + Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); + Real const x_pos_rot = x_pos * cos_pitch * cos_yaw + y_pos * cos_pitch * sin_yaw + z_pos * sin_pitch; + + // Compute the momentum + Real const momentum_x = density * velocity_x; + Real const momentum_y = -P.polarization * density * amplitude * std::sin(wavenumber * x_pos_rot); + Real const momentum_z = -density * amplitude * std::cos(wavenumber * x_pos_rot); + Real const momentum_x_rot = + momentum_x * cos_pitch * cos_yaw - momentum_y * sin_yaw - momentum_z * sin_pitch * cos_yaw; + Real const momentum_y_rot = + momentum_x * cos_pitch * sin_yaw + momentum_y * cos_yaw - momentum_z * sin_pitch * sin_yaw; + Real const momentum_z_rot = momentum_x * sin_pitch + momentum_z * cos_pitch; + + // Compute the Energy + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + Real const energy = hydro_utilities::Calc_Energy_Conserved(pressure, density, momentum_x_rot, momentum_y_rot, + momentum_z_rot, ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); + + // Final assignment + C.density[id] = density; + C.momentum_x[id] = momentum_x_rot; + C.momentum_y[id] = momentum_y_rot; + C.momentum_z[id] = momentum_z_rot; + C.Energy[id] = energy; + } + } + } +} + +void Grid3D::Advecting_Field_Loop(struct Parameters const P) +{ + // This test is only meaningful for a limited number of parameter values so I will check them here + // Check that the domain is centered on zero + assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and + "Domain must be centered at zero"); + + // Check that P.radius is smaller than the size of the domain + Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2); + assert(domain_size > P.radius and "The size of the domain must be greater than P.radius"); + + // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new + // values when required and ignore the cases where I would be assigning zero + std::vector vectorPotential(3 * H.n_cells, 0); + for (int k = 0; k < H.nz; k++) { + for (int j = 0; j < H.ny; j++) { + for (int i = 0; i < H.nx; i++) { + // Get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Y vector potential + Real radius = std::hypot(x + H.dx / 2., y, z + H.dz / 2.); + if (radius < P.radius) { + vectorPotential.at(id + 1 * H.n_cells) = P.A * (P.radius - radius); + } + + // Z vector potential + radius = std::hypot(x + H.dx / 2., y + H.dy / 2., z); + if (radius < P.radius) { + vectorPotential.at(id + 2 * H.n_cells) = P.A * (P.radius - radius); + } + } + } + } + // Initialize the magnetic fields + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + + // Initialize the hydro variables + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Compute the cell centered magnetic fields + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + + // Assignment + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved(P.P, P.rho, C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); + } + } + } } +void Grid3D::MHD_Spherical_Blast(struct Parameters const P) +{ + // This test is only meaningful for a limited number of parameter values so I will check them here + // Check that the domain is centered on zero + assert((P.xmin + P.xlen / 2) == 0 and (P.ymin + P.ylen / 2) == 0 and (P.zmin + P.zlen / 2 == 0) and + "Domain must be centered at zero"); + + // Check that P.radius is smaller than the size of the domain + Real const domain_size = std::hypot(P.xlen / 2, P.ylen / 2, P.zlen / 2); + assert(domain_size > P.radius and "The size of the domain must be greater than P.radius"); + + // Initialize the magnetic field + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + C.magnetic_x[id] = P.Bx; + C.magnetic_y[id] = P.By; + C.magnetic_z[id] = P.Bz; + } + } + } + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Set the fields that don't depend on pressure + C.density[id] = P.rho; + C.momentum_x[id] = P.rho * P.vx; + C.momentum_y[id] = P.rho * P.vy; + C.momentum_z[id] = P.rho * P.vz; + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Compute the magnetic field in this cell + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + + // Set the field(s) that do depend on pressure. That's just energy + Real const radius = std::hypot(x, y, z); + Real pressure; + if (radius < P.radius) { + pressure = P.P_blast; + } else { + pressure = P.P; + } + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + pressure, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, magnetic_centered.x, + magnetic_centered.y, magnetic_centered.z); + } + } + } +} +void Grid3D::Orszag_Tang_Vortex() +{ + // This problem requires specific parameters so I will define them here + Real const magnetic_background = 1.0 / std::sqrt(4.0 * M_PI); + Real const density_background = 25.0 / (36.0 * M_PI); + Real const velocity_background = 1.0; + Real const pressure_background = 5.0 / (12.0 * M_PI); + + // Compute the vector potential. Since the vector potential std::vector is initialized to zero I will only assign new + // values when required and ignore the cases where I would be assigning zero + std::vector vectorPotential(3 * H.n_cells, 0); + for (int k = 0; k < H.nz; k++) { + for (int j = 0; j < H.ny; j++) { + for (int i = 0; i < H.nx; i++) { + // Get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Z vector potential + vectorPotential.at(id + 2 * H.n_cells) = + magnetic_background / (4.0 * M_PI) * (std::cos(4.0 * M_PI * x) + 2.0 * std::cos(2.0 * M_PI * y)); + } + } + } + // Initialize the magnetic fields + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vectorPotential); + // Initialize the hydro variables + for (int k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + for (int j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (int i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + // get cell index + int const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + + // Get the cell centered positions + Real x, y, z; + Get_Position(i, j, k, &x, &y, &z); + + // Compute the cell centered magnetic fields + auto const magnetic_centered = + mhd::utils::cellCenteredMagneticFields(C.host, id, i, j, k, H.n_cells, H.nx, H.ny); + + // Assignment + C.density[id] = density_background; + C.momentum_x[id] = density_background * velocity_background * std::sin(2.0 * M_PI * y); + C.momentum_y[id] = -density_background * velocity_background * std::sin(2.0 * M_PI * x); + C.momentum_z[id] = 0.0; + C.Energy[id] = hydro_utilities::Calc_Energy_Conserved( + pressure_background, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], ::gama, + magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + } + } + } +} +#endif // MHD diff --git a/src/grid/mpi_boundaries.cpp b/src/grid/mpi_boundaries.cpp index 2d4c40bf5..747bcd6ec 100644 --- a/src/grid/mpi_boundaries.cpp +++ b/src/grid/mpi_boundaries.cpp @@ -1,47 +1,44 @@ -#include "../grid/grid3D.h" -#include "../mpi/mpi_routines.h" -#include "../io/io.h" -#include "../utils/error_handling.h" #include +#include "../global/global_cuda.h" //provides TPB +#include "../grid/cuda_boundaries.h" // provides PackBuffers3D and UnpackBuffers3D +#include "../io/io.h" +#include "../mpi/mpi_routines.h" +#include "../utils/error_handling.h" #include "../utils/gpu.hpp" -#include "../global/global_cuda.h"//provides TPB -#include "../grid/cuda_boundaries.h"// provides PackBuffers3D and UnpackBuffers3D +#include "grid3D.h" #ifdef MPI_CHOLLA -void Grid3D::Set_Boundaries_MPI(struct parameters P) +void Grid3D::Set_Boundaries_MPI(struct Parameters P) { - int flags[6] = {0,0,0,0,0,0}; + int flags[6] = {0, 0, 0, 0, 0, 0}; - if(Check_Custom_Boundary(&flags[0],P)) - { - //perform custom boundaries + if (Check_Custom_Boundary(&flags[0], P)) { + // perform custom boundaries Custom_Boundary(P.custom_bcnd); } - Set_Boundaries_MPI_BLOCK(flags,P); + Set_Boundaries_MPI_BLOCK(flags, P); #ifdef GRAVITY - Grav.Set_Boundary_Flags( flags ); + Grav.Set_Boundary_Flags(flags); #endif - } -void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) +void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct Parameters P) { #ifdef PARTICLES // Clear the vectors that contain the particles IDs to be transfred - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { Particles.Clear_Particles_For_Transfer(); - Particles.Select_Particles_to_Transfer_All( flags ); + Particles.Select_Particles_to_Transfer_All(flags); } #endif if (H.nx > 1) { - /* Step 1 - Send MPI x-boundaries */ - if (flags[0]==5 || flags[1]==5) { + if (flags[0] == 5 || flags[1] == 5) { Load_and_Send_MPI_Comm_Buffers(0, flags); } @@ -51,20 +48,20 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) /* Step 3 - Receive MPI x-boundaries */ - if (flags[0]==5 || flags[1]==5) { + if (flags[0] == 5 || flags[1] == 5) { Wait_and_Unload_MPI_Comm_Buffers(0, flags); - #ifdef PARTICLES + #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); - #endif + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(0, flags); + } + #endif } - } MPI_Barrier(world); if (H.ny > 1) { - /* Step 4 - Send MPI y-boundaries */ - if (flags[2]==5 || flags[3]==5) { + if (flags[2] == 5 || flags[3] == 5) { Load_and_Send_MPI_Comm_Buffers(1, flags); } @@ -73,19 +70,20 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Set_Boundaries(3, flags); /* Step 6 - Receive MPI y-boundaries */ - if (flags[2]==5 || flags[3]==5) { + if (flags[2] == 5 || flags[3] == 5) { Wait_and_Unload_MPI_Comm_Buffers(1, flags); - #ifdef PARTICLES + #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); - #endif + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(1, flags); + } + #endif } } MPI_Barrier(world); if (H.nz > 1) { - /* Step 7 - Send MPI z-boundaries */ - if (flags[4]==5 || flags[5]==5) { + if (flags[4] == 5 || flags[5] == 5) { Load_and_Send_MPI_Comm_Buffers(2, flags); } @@ -94,789 +92,758 @@ void Grid3D::Set_Boundaries_MPI_BLOCK(int *flags, struct parameters P) Set_Boundaries(5, flags); /* Step 9 - Receive MPI z-boundaries */ - if (flags[4]==5 || flags[5]==5) { + if (flags[4] == 5 || flags[5] == 5) { Wait_and_Unload_MPI_Comm_Buffers(2, flags); - #ifdef PARTICLES + #ifdef PARTICLES // Unload Particles buffers when transfering Particles - if (Particles.TRANSFER_PARTICLES_BOUNDARIES) Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); - #endif + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(2, flags); + } + #endif } } #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) Finish_Particles_Transfer(); + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Finish_Particles_Transfer(); + } #endif - } - -int Grid3D::Load_Hydro_DeviceBuffer_X0 ( Real *send_buffer_x0 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_X0(Real *send_buffer_x0) +{ // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.n_ghost; - PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.n_ghost + H.n_ghost*H.nx; - PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.n_ghost + H.n_ghost * H.nx; + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.ny > 1 && H.nz > 1) { - int idxoffset = H.n_ghost + H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } return x_buffer_length; } - // load right x communication buffer -int Grid3D::Load_Hydro_DeviceBuffer_X1 ( Real *send_buffer_x1 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_X1(Real *send_buffer_x1) +{ // 1D if (H.ny == 1 && H.nz == 1) { - int idxoffset = H.nx-2*H.n_ghost; - PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + int idxoffset = H.nx - 2 * H.n_ghost; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.nx-2*H.n_ghost + H.n_ghost*H.nx; - PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.ny > 1 && H.nz > 1) { - int idxoffset = H.nx-2*H.n_ghost + H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.nx - 2 * H.n_ghost + H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } return x_buffer_length; } // load left y communication buffer -int Grid3D::Load_Hydro_DeviceBuffer_Y0 ( Real *send_buffer_y0 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Y0(Real *send_buffer_y0) +{ // 2D if (H.nz == 1) { - int idxoffset = H.n_ghost*H.nx; - PackBuffers3D(send_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + int idxoffset = H.n_ghost * H.nx; + PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.n_ghost*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } return y_buffer_length; } -int Grid3D::Load_Hydro_DeviceBuffer_Y1 ( Real *send_buffer_y1 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Y1(Real *send_buffer_y1) +{ // 2D if (H.nz == 1) { - int idxoffset = (H.ny-2*H.n_ghost)*H.nx; - PackBuffers3D(send_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx; + PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = (H.ny-2*H.n_ghost)*H.nx + H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = (H.ny - 2 * H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } return y_buffer_length; - } // load left z communication buffer -int Grid3D::Load_Hydro_DeviceBuffer_Z0 ( Real *send_buffer_z0 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Z0(Real *send_buffer_z0) +{ // 3D - int idxoffset = H.n_ghost*H.nx*H.ny; - PackBuffers3D(send_buffer_z0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + int idxoffset = H.n_ghost * H.nx * H.ny; + PackBuffers3D(send_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); return z_buffer_length; } -int Grid3D::Load_Hydro_DeviceBuffer_Z1 ( Real *send_buffer_z1 ){ - +int Grid3D::Load_Hydro_DeviceBuffer_Z1(Real *send_buffer_z1) +{ // 3D - int idxoffset = (H.nz-2*H.n_ghost)*H.nx*H.ny; - PackBuffers3D(send_buffer_z1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + int idxoffset = (H.nz - 2 * H.n_ghost) * H.nx * H.ny; + PackBuffers3D(send_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); return z_buffer_length; } -void Grid3D::Unload_Hydro_DeviceBuffer_X0 ( Real *recv_buffer_x0 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_X0(Real *recv_buffer_x0) +{ // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = 0; - UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.n_ghost*H.nx; - UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.n_ghost * H.nx; + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.n_ghost*(H.nx+H.nx*H.ny); - UnpackBuffers3D(recv_buffer_x0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost * (H.nx + H.nx * H.ny); + UnpackBuffers3D(recv_buffer_x0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } - } -void Grid3D::Unload_Hydro_DeviceBuffer_X1 ( Real *recv_buffer_x1 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_X1(Real *recv_buffer_x1) +{ // 1D if (H.ny == 1 && H.nz == 1) { int idxoffset = H.nx - H.n_ghost; - UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,1,1); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, 1, 1); } // 2D if (H.ny > 1 && H.nz == 1) { - int idxoffset = H.nx - H.n_ghost + H.n_ghost*H.nx; - UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,1); + int idxoffset = H.nx - H.n_ghost + H.n_ghost * H.nx; + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.nx - H.n_ghost + H.n_ghost*(H.nx+H.nx*H.ny); - UnpackBuffers3D(recv_buffer_x1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.n_ghost,H.ny-2*H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.nx - H.n_ghost + H.n_ghost * (H.nx + H.nx * H.ny); + UnpackBuffers3D(recv_buffer_x1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.n_ghost, + H.ny - 2 * H.n_ghost, H.nz - 2 * H.n_ghost); } - } - -void Grid3D::Unload_Hydro_DeviceBuffer_Y0 ( Real *recv_buffer_y0 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Y0(Real *recv_buffer_y0) +{ // 2D if (H.nz == 1) { int idxoffset = 0; - UnpackBuffers3D(recv_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = H.n_ghost*H.nx*H.ny; - UnpackBuffers3D(recv_buffer_y0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = H.n_ghost * H.nx * H.ny; + UnpackBuffers3D(recv_buffer_y0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } - } - -void Grid3D::Unload_Hydro_DeviceBuffer_Y1 ( Real *recv_buffer_y1 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Y1(Real *recv_buffer_y1) +{ // 2D if (H.nz == 1) { - int idxoffset = (H.ny-H.n_ghost)*H.nx; - UnpackBuffers3D(recv_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,1); + int idxoffset = (H.ny - H.n_ghost) * H.nx; + UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, 1); } // 3D if (H.nz > 1) { - int idxoffset = (H.ny-H.n_ghost)*H.nx + H.n_ghost*H.nx*H.ny; - UnpackBuffers3D(recv_buffer_y1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.n_ghost,H.nz-2*H.n_ghost); + int idxoffset = (H.ny - H.n_ghost) * H.nx + H.n_ghost * H.nx * H.ny; + UnpackBuffers3D(recv_buffer_y1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.n_ghost, + H.nz - 2 * H.n_ghost); } - } - - -void Grid3D::Unload_Hydro_DeviceBuffer_Z0 ( Real *recv_buffer_z0 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Z0(Real *recv_buffer_z0) +{ // 3D int idxoffset = 0; - UnpackBuffers3D(recv_buffer_z0,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + UnpackBuffers3D(recv_buffer_z0, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); } - -void Grid3D::Unload_Hydro_DeviceBuffer_Z1 ( Real *recv_buffer_z1 ) { - +void Grid3D::Unload_Hydro_DeviceBuffer_Z1(Real *recv_buffer_z1) +{ // 3D - int idxoffset = (H.nz-H.n_ghost)*H.nx*H.ny; - UnpackBuffers3D(recv_buffer_z1,C.device,H.nx,H.ny,H.n_fields,H.n_cells,idxoffset,H.nx,H.ny,H.n_ghost); + int idxoffset = (H.nz - H.n_ghost) * H.nx * H.ny; + UnpackBuffers3D(recv_buffer_z1, C.device, H.nx, H.ny, H.n_fields, H.n_cells, idxoffset, H.nx, H.ny, H.n_ghost); } void Grid3D::Load_and_Send_MPI_Comm_Buffers(int dir, int *flags) { - #ifdef PARTICLES // Select which particles need to be transfred for this direction - // if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) Particles.Select_Particles_to_Transfer( dir ); + // if ( Particles.TRANSFER_PARTICLES_BOUNDARIES) + // Particles.Select_Particles_to_Transfer( dir ); // Initialize MPI requests for particles transfers int ireq_n_particles, ireq_particles_transfer; - ireq_n_particles = 0; + ireq_n_particles = 0; ireq_particles_transfer = 0; #endif int ireq; ireq = 0; - int xbsize = x_buffer_length, - ybsize = y_buffer_length, - zbsize = z_buffer_length; + int xbsize = x_buffer_length, ybsize = y_buffer_length, zbsize = z_buffer_length; int buffer_length; - // Flag to omit the transfer of the main buffer when tranferring the particles buffer + // Flag to omit the transfer of the main buffer when tranferring the particles + // buffer bool transfer_main_buffer = true; /* x boundaries */ - if(dir == 0) - { - if (flags[0]==5) { - + if (dir == 0) { + if (flags[0] == 5) { // load left x communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_X0(d_send_buffer_x0); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 0, 0, d_send_buffer_x0, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 0, 0, h_send_buffer_x0, 0 ); - #endif - + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 0, 0, h_send_buffer_x0 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 0, 0, d_send_buffer_x0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 0, h_send_buffer_x0_particles ); - cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(0, 0, d_send_buffer_x0, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = Load_Gravity_Potential_To_Buffer(0, 0, h_send_buffer_x0, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_X0( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(0, 0, h_send_buffer_x0); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(0, 0, d_send_buffer_x0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x0, d_send_buffer_x0, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 0, h_send_buffer_x0_particles); + cudaMemcpy(d_send_buffer_x0, h_send_buffer_x0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_X0(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif + + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive left x communication buffer + MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive left x communication buffer - MPI_Irecv(d_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, - world, &recv_request[ireq]); - - //non-blocking send left x communication buffer - MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, - world, &send_request[0]); - #else - //post non-blocking receive left x communication buffer - MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, - world, &recv_request[ireq]); - - //non-blocking send left x communication buffer - MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, - world, &send_request[0]); - #endif + // non-blocking send left x communication buffer + MPI_Isend(d_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); + #else + // post non-blocking receive left x communication buffer + MPI_Irecv(h_recv_buffer_x0, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request[ireq]); + + // non-blocking send left x communication buffer + MPI_Isend(h_send_buffer_x0, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request[0]); + #endif MPI_Request_free(send_request); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - if(flags[1]==5) - { + if (flags[1] == 5) { // load right x communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_X1(d_send_buffer_x1); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("X1 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 0, 1, d_send_buffer_x1, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 0, 1, h_send_buffer_x1, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + // printf("X1 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 0, 1, h_send_buffer_x1 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 0, 1, d_send_buffer_x1 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 0, 1, h_send_buffer_x1_particles ); - cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(0, 1, d_send_buffer_x1, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = Load_Gravity_Potential_To_Buffer(0, 1, h_send_buffer_x1, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_X1( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(0, 1, h_send_buffer_x1); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(0, 1, d_send_buffer_x1); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_x1, d_send_buffer_x1, xbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer(0, 1, h_send_buffer_x1_particles); + cudaMemcpy(d_send_buffer_x1, h_send_buffer_x1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_X1(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive right x communication buffer + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive right x communication buffer MPI_Irecv(d_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); - #else - //post non-blocking receive right x communication buffer + // non-blocking send right x communication buffer + MPI_Isend(d_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); + #else + // post non-blocking receive right x communication buffer MPI_Irecv(h_recv_buffer_x1, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); - #endif + // non-blocking send right x communication buffer + MPI_Isend(h_send_buffer_x1, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request[1]); + #endif - MPI_Request_free(send_request+1); + MPI_Request_free(send_request + 1); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - // Receive the number of particles transfer for X - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags ); - #endif - + // Receive the number of particles transfer for X + #ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + } + #endif } /* y boundaries */ - if (dir==1) { - if(flags[2] == 5) - { + if (dir == 1) { + if (flags[2] == 5) { // load left y communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Y0(d_send_buffer_y0); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Y0 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 1, 0, d_send_buffer_y0, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 1, 0, h_send_buffer_y0, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + // printf("Y0 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 1, 0, h_send_buffer_y0 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 1, 0, d_send_buffer_y0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 0, h_send_buffer_y0_particles ); - cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(1, 0, d_send_buffer_y0, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = Load_Gravity_Potential_To_Buffer(1, 0, h_send_buffer_y0, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Y0( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(1, 0, h_send_buffer_y0); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(1, 0, d_send_buffer_y0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y0, d_send_buffer_y0, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 0, h_send_buffer_y0_particles); + cudaMemcpy(d_send_buffer_y0, h_send_buffer_y0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Y0(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive left y communication buffer + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive left y communication buffer MPI_Irecv(d_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); - //non-blocking send left y communication buffer - MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); - #else - //post non-blocking receive left y communication buffer + // non-blocking send left y communication buffer + MPI_Isend(d_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); + #else + // post non-blocking receive left y communication buffer MPI_Irecv(h_recv_buffer_y0, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request[ireq]); - //non-blocking send left y communication buffer - MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); - #endif + // non-blocking send left y communication buffer + MPI_Isend(h_send_buffer_y0, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request[0]); + #endif MPI_Request_free(send_request); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - if(flags[3]==5) - { + if (flags[3] == 5) { // load right y communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Y1(d_send_buffer_y1); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Y1 len: %d\n", buffer_length); - } - - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 1, 1, d_send_buffer_y1, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 1, 1, h_send_buffer_y1, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + // printf("Y1 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 1, 1, h_send_buffer_y1 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 1, 1, d_send_buffer_y1 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 1, 1, h_send_buffer_y1_particles ); - cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(1, 1, d_send_buffer_y1, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = Load_Gravity_Potential_To_Buffer(1, 1, h_send_buffer_y1, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Y1( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(1, 1, h_send_buffer_y1); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(1, 1, d_send_buffer_y1); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_y1, d_send_buffer_y1, ybsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer(1, 1, h_send_buffer_y1_particles); + cudaMemcpy(d_send_buffer_y1, h_send_buffer_y1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Y1(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive right y communication buffer + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive right y communication buffer MPI_Irecv(d_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); - //non-blocking send right y communication buffer - MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); - #else - //post non-blocking receive right y communication buffer + // non-blocking send right y communication buffer + MPI_Isend(d_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); + #else + // post non-blocking receive right y communication buffer MPI_Irecv(h_recv_buffer_y1, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request[ireq]); - //non-blocking send right y communication buffer - MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); - #endif - MPI_Request_free(send_request+1); + // non-blocking send right y communication buffer + MPI_Isend(h_send_buffer_y1, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request[1]); + #endif + MPI_Request_free(send_request + 1); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - // Receive the number of particles transfer for Y - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags ); - #endif - + // Receive the number of particles transfer for Y + #ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + } + #endif } /* z boundaries */ - if (dir==2) { - - if(flags[4]==5) - { + if (dir == 2) { + if (flags[4] == 5) { // left z communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Z0(d_send_buffer_z0); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Z0 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 2, 0, d_send_buffer_z0, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 2, 0, h_send_buffer_z0, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + // printf("Z0 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 2, 0, h_send_buffer_z0 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 2, 0, d_send_buffer_z0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 0, h_send_buffer_z0_particles ); - cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(2, 0, d_send_buffer_z0, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = Load_Gravity_Potential_To_Buffer(2, 0, h_send_buffer_z0, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Z0( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(2, 0, h_send_buffer_z0); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(2, 0, d_send_buffer_z0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z0, d_send_buffer_z0, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 0, h_send_buffer_z0_particles); + cudaMemcpy(d_send_buffer_z0, h_send_buffer_z0_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Z0(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive left z communication buffer + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive left z communication buffer MPI_Irecv(d_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); - //non-blocking send left z communication buffer - MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); - #else - //post non-blocking receive left z communication buffer + // non-blocking send left z communication buffer + MPI_Isend(d_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); + #else + // post non-blocking receive left z communication buffer MPI_Irecv(h_recv_buffer_z0, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request[ireq]); - //non-blocking send left z communication buffer - MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); - #endif + // non-blocking send left z communication buffer + MPI_Isend(h_send_buffer_z0, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request[0]); + #endif MPI_Request_free(send_request); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } - if(flags[5]==5) - { + if (flags[5] == 5) { // load right z communication buffer - if ( H.TRANSFER_HYDRO_BOUNDARIES ) - { + if (H.TRANSFER_HYDRO_BOUNDARIES) { buffer_length = Load_Hydro_DeviceBuffer_Z1(d_send_buffer_z1); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - //printf("Z1 len: %d\n", buffer_length); - } - - #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ - #ifdef GRAVITY_GPU - buffer_length = Load_Gravity_Potential_To_Buffer_GPU( 2, 1, d_send_buffer_z1, 0 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - buffer_length = Load_Gravity_Potential_To_Buffer( 2, 1, h_send_buffer_z1, 0 ); - #endif + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + // printf("Z1 len: %d\n", buffer_length); } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ) buffer_length = Load_Poisson_Boundary_To_Buffer( 2, 1, h_send_buffer_z1 ); - #endif //SOR - #endif //GRAVITY - - #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES) { - #ifdef PARTICLES_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU( 2, 1, d_send_buffer_z1 ); - #ifndef MPI_GPU - cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize*sizeof(Real), - cudaMemcpyDeviceToHost); - #endif - #else - #ifndef MPI_GPU - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1 ); - #else - buffer_length = Load_Particles_Density_Boundary_to_Buffer( 2, 1, h_send_buffer_z1_particles ); - cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length*sizeof(Real), cudaMemcpyHostToDevice); - #endif - #endif + + #ifdef GRAVITY + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { + #ifdef GRAVITY_GPU + buffer_length = Load_Gravity_Potential_To_Buffer_GPU(2, 1, d_send_buffer_z1, 0); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + buffer_length = Load_Gravity_Potential_To_Buffer(2, 1, h_send_buffer_z1, 0); + #endif } - else if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ){ - Load_and_Send_Particles_Z1( ireq_n_particles, ireq_particles_transfer ); + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) + buffer_length = Load_Poisson_Boundary_To_Buffer(2, 1, h_send_buffer_z1); + #endif // SOR + #endif // GRAVITY + + #ifdef PARTICLES + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { + #ifdef PARTICLES_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer_GPU(2, 1, d_send_buffer_z1); + #ifndef MPI_GPU + cudaMemcpy(h_send_buffer_z1, d_send_buffer_z1, zbsize * sizeof(Real), cudaMemcpyDeviceToHost); + #endif + #else + #ifndef MPI_GPU + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1); + #else + buffer_length = Load_Particles_Density_Boundary_to_Buffer(2, 1, h_send_buffer_z1_particles); + cudaMemcpy(d_send_buffer_z1, h_send_buffer_z1_particles, buffer_length * sizeof(Real), cudaMemcpyHostToDevice); + #endif + #endif + } else if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Load_and_Send_Particles_Z1(ireq_n_particles, ireq_particles_transfer); transfer_main_buffer = false; - ireq_n_particles ++; - ireq_particles_transfer ++; + ireq_n_particles++; + ireq_particles_transfer++; } - #endif + #endif - if ( transfer_main_buffer ){ - #if defined(MPI_GPU) - //post non-blocking receive right x communication buffer + if (transfer_main_buffer) { + #if defined(MPI_GPU) + // post non-blocking receive right x communication buffer MPI_Irecv(d_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); - #else - //post non-blocking receive right x communication buffer + // non-blocking send right x communication buffer + MPI_Isend(d_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); + #else + // post non-blocking receive right x communication buffer MPI_Irecv(h_recv_buffer_z1, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request[ireq]); - //non-blocking send right x communication buffer - MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); - #endif - MPI_Request_free(send_request+1); + // non-blocking send right x communication buffer + MPI_Isend(h_send_buffer_z1, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request[1]); + #endif + MPI_Request_free(send_request + 1); - //keep track of how many sends and receives are expected + // keep track of how many sends and receives are expected ireq++; } } // Receive the number of particles transfer for Z - #ifdef PARTICLES - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK( dir, flags ); - #endif + #ifdef PARTICLES + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + Wait_NTransfer_and_Request_Recv_Particles_Transfer_BLOCK(dir, flags); + } + #endif } - } void Grid3D::Wait_and_Unload_MPI_Comm_Buffers(int dir, int *flags) { - #ifdef PARTICLES - // If we are transfering the particles buffers we dont need to unload the main buffers - if ( Particles.TRANSFER_PARTICLES_BOUNDARIES ) return; + // If we are transfering the particles buffers we dont need to unload the main + // buffers + if (Particles.TRANSFER_PARTICLES_BOUNDARIES) { + return; + } #endif int iwait; - int index = 0; - int wait_max=0; + int index = 0; + int wait_max = 0; MPI_Status status; - //find out how many recvs we need to wait for - if (dir==0) { - if(flags[0] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[1] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - } - if (dir==1) { - if(flags[2] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[3] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - } - if (dir==2) { - if(flags[4] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[5] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - } - - //wait for any receives to complete - for(iwait=0;iwait*Fptr_Unload_Hydro_Buffer_X0) ( l_recv_buffer_x0 ); break; - case ( 1 ): (this->*Fptr_Unload_Hydro_Buffer_X1) ( l_recv_buffer_x1 ); break; - case ( 2 ): (this->*Fptr_Unload_Hydro_Buffer_Y0) ( l_recv_buffer_y0 ); break; - case ( 3 ): (this->*Fptr_Unload_Hydro_Buffer_Y1) ( l_recv_buffer_y1 ); break; - case ( 4 ): (this->*Fptr_Unload_Hydro_Buffer_Z0) ( l_recv_buffer_z0 ); break; - case ( 5 ): (this->*Fptr_Unload_Hydro_Buffer_Z1) ( l_recv_buffer_z1 ); break; + switch (index) { + case (0): + (this->*Fptr_Unload_Hydro_Buffer_X0)(l_recv_buffer_x0); + break; + case (1): + (this->*Fptr_Unload_Hydro_Buffer_X1)(l_recv_buffer_x1); + break; + case (2): + (this->*Fptr_Unload_Hydro_Buffer_Y0)(l_recv_buffer_y0); + break; + case (3): + (this->*Fptr_Unload_Hydro_Buffer_Y1)(l_recv_buffer_y1); + break; + case (4): + (this->*Fptr_Unload_Hydro_Buffer_Z0)(l_recv_buffer_z0); + break; + case (5): + (this->*Fptr_Unload_Hydro_Buffer_Z1)(l_recv_buffer_z1); + break; } } #ifdef GRAVITY - if ( Grav.TRANSFER_POTENTIAL_BOUNDARIES ){ + if (Grav.TRANSFER_POTENTIAL_BOUNDARIES) { #ifdef GRAVITY_GPU - #ifndef MPI_GPU - copyHostToDeviceReceiveBuffer ( index ); - #endif // MPI_GPU + #ifndef MPI_GPU + copyHostToDeviceReceiveBuffer(index); + #endif // MPI_GPU l_recv_buffer_x0 = d_recv_buffer_x0; l_recv_buffer_x1 = d_recv_buffer_x1; @@ -916,8 +894,7 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = d_recv_buffer_z0; l_recv_buffer_z1 = d_recv_buffer_z1; - Fptr_Unload_Gravity_Potential - = &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU; + Fptr_Unload_Gravity_Potential = &Grid3D::Unload_Gravity_Potential_from_Buffer_GPU; #else @@ -928,21 +905,32 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - Fptr_Unload_Gravity_Potential - = &Grid3D::Unload_Gravity_Potential_from_Buffer; + Fptr_Unload_Gravity_Potential = &Grid3D::Unload_Gravity_Potential_from_Buffer; - #endif // GRAVITY_GPU + #endif // GRAVITY_GPU - if ( index == 0 ) (this->*Fptr_Unload_Gravity_Potential)( 0, 0, l_recv_buffer_x0, 0 ); - if ( index == 1 ) (this->*Fptr_Unload_Gravity_Potential)( 0, 1, l_recv_buffer_x1, 0 ); - if ( index == 2 ) (this->*Fptr_Unload_Gravity_Potential)( 1, 0, l_recv_buffer_y0, 0 ); - if ( index == 3 ) (this->*Fptr_Unload_Gravity_Potential)( 1, 1, l_recv_buffer_y1, 0 ); - if ( index == 4 ) (this->*Fptr_Unload_Gravity_Potential)( 2, 0, l_recv_buffer_z0, 0 ); - if ( index == 5 ) (this->*Fptr_Unload_Gravity_Potential)( 2, 1, l_recv_buffer_z1, 0 ); + if (index == 0) { + (this->*Fptr_Unload_Gravity_Potential)(0, 0, l_recv_buffer_x0, 0); + } + if (index == 1) { + (this->*Fptr_Unload_Gravity_Potential)(0, 1, l_recv_buffer_x1, 0); + } + if (index == 2) { + (this->*Fptr_Unload_Gravity_Potential)(1, 0, l_recv_buffer_y0, 0); + } + if (index == 3) { + (this->*Fptr_Unload_Gravity_Potential)(1, 1, l_recv_buffer_y1, 0); + } + if (index == 4) { + (this->*Fptr_Unload_Gravity_Potential)(2, 0, l_recv_buffer_z0, 0); + } + if (index == 5) { + (this->*Fptr_Unload_Gravity_Potential)(2, 1, l_recv_buffer_z1, 0); + } } - #ifdef SOR - if ( Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES ){ + #ifdef SOR + if (Grav.Poisson_solver.TRANSFER_POISSON_BOUNDARIES) { l_recv_buffer_x0 = h_recv_buffer_x0; l_recv_buffer_x1 = h_recv_buffer_x1; l_recv_buffer_y0 = h_recv_buffer_y0; @@ -950,23 +938,34 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - if ( index == 0 ) Unload_Poisson_Boundary_From_Buffer( 0, 0, l_recv_buffer_x0 ); - if ( index == 1 ) Unload_Poisson_Boundary_From_Buffer( 0, 1, l_recv_buffer_x1 ); - if ( index == 2 ) Unload_Poisson_Boundary_From_Buffer( 1, 0, l_recv_buffer_y0 ); - if ( index == 3 ) Unload_Poisson_Boundary_From_Buffer( 1, 1, l_recv_buffer_y1 ); - if ( index == 4 ) Unload_Poisson_Boundary_From_Buffer( 2, 0, l_recv_buffer_z0 ); - if ( index == 5 ) Unload_Poisson_Boundary_From_Buffer( 2, 1, l_recv_buffer_z1 ); + if (index == 0) { + Unload_Poisson_Boundary_From_Buffer(0, 0, l_recv_buffer_x0); + } + if (index == 1) { + Unload_Poisson_Boundary_From_Buffer(0, 1, l_recv_buffer_x1); + } + if (index == 2) { + Unload_Poisson_Boundary_From_Buffer(1, 0, l_recv_buffer_y0); + } + if (index == 3) { + Unload_Poisson_Boundary_From_Buffer(1, 1, l_recv_buffer_y1); + } + if (index == 4) { + Unload_Poisson_Boundary_From_Buffer(2, 0, l_recv_buffer_z0); + } + if (index == 5) { + Unload_Poisson_Boundary_From_Buffer(2, 1, l_recv_buffer_z1); + } } - #endif //SOR - - #endif //GRAVITY + #endif // SOR + #endif // GRAVITY #ifdef PARTICLES - if ( Particles.TRANSFER_DENSITY_BOUNDARIES ){ + if (Particles.TRANSFER_DENSITY_BOUNDARIES) { #ifdef PARTICLES_GPU #ifndef MPI_GPU - copyHostToDeviceReceiveBuffer ( index ); + copyHostToDeviceReceiveBuffer(index); #endif l_recv_buffer_x0 = d_recv_buffer_x0; @@ -976,48 +975,69 @@ void Grid3D::Unload_MPI_Comm_Buffers(int index) l_recv_buffer_z0 = d_recv_buffer_z0; l_recv_buffer_z1 = d_recv_buffer_z1; - Fptr_Unload_Particle_Density - = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; + Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU; #else - - #ifdef MPI_GPU - if ( index == 0 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles ); - if ( index == 1 ) Copy_Particles_Density_Buffer_Device_to_Host( 0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles ); - if ( index == 2 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles ); - if ( index == 3 ) Copy_Particles_Density_Buffer_Device_to_Host( 1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles ); - if ( index == 4 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles ); - if ( index == 5 ) Copy_Particles_Density_Buffer_Device_to_Host( 2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles ); + + #ifdef MPI_GPU + if (index == 0) { + Copy_Particles_Density_Buffer_Device_to_Host(0, 0, d_recv_buffer_x0, h_recv_buffer_x0_particles); + } + if (index == 1) { + Copy_Particles_Density_Buffer_Device_to_Host(0, 1, d_recv_buffer_x1, h_recv_buffer_x1_particles); + } + if (index == 2) { + Copy_Particles_Density_Buffer_Device_to_Host(1, 0, d_recv_buffer_y0, h_recv_buffer_y0_particles); + } + if (index == 3) { + Copy_Particles_Density_Buffer_Device_to_Host(1, 1, d_recv_buffer_y1, h_recv_buffer_y1_particles); + } + if (index == 4) { + Copy_Particles_Density_Buffer_Device_to_Host(2, 0, d_recv_buffer_z0, h_recv_buffer_z0_particles); + } + if (index == 5) { + Copy_Particles_Density_Buffer_Device_to_Host(2, 1, d_recv_buffer_z1, h_recv_buffer_z1_particles); + } l_recv_buffer_x0 = h_recv_buffer_x0_particles; l_recv_buffer_x1 = h_recv_buffer_x1_particles; l_recv_buffer_y0 = h_recv_buffer_y0_particles; l_recv_buffer_y1 = h_recv_buffer_y1_particles; l_recv_buffer_z0 = h_recv_buffer_z0_particles; l_recv_buffer_z1 = h_recv_buffer_z1_particles; - #else + #else l_recv_buffer_x0 = h_recv_buffer_x0; l_recv_buffer_x1 = h_recv_buffer_x1; l_recv_buffer_y0 = h_recv_buffer_y0; l_recv_buffer_y1 = h_recv_buffer_y1; l_recv_buffer_z0 = h_recv_buffer_z0; l_recv_buffer_z1 = h_recv_buffer_z1; - #endif //MPI_GPU - - Fptr_Unload_Particle_Density - = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; + #endif // MPI_GPU - #endif // PARTICLES_GPU + Fptr_Unload_Particle_Density = &Grid3D::Unload_Particles_Density_Boundary_From_Buffer; - if ( index == 0 ) (this->*Fptr_Unload_Particle_Density)( 0, 0, l_recv_buffer_x0 ); - if ( index == 1 ) (this->*Fptr_Unload_Particle_Density)( 0, 1, l_recv_buffer_x1 ); - if ( index == 2 ) (this->*Fptr_Unload_Particle_Density)( 1, 0, l_recv_buffer_y0 ); - if ( index == 3 ) (this->*Fptr_Unload_Particle_Density)( 1, 1, l_recv_buffer_y1 ); - if ( index == 4 ) (this->*Fptr_Unload_Particle_Density)( 2, 0, l_recv_buffer_z0 ); - if ( index == 5 ) (this->*Fptr_Unload_Particle_Density)( 2, 1, l_recv_buffer_z1 ); - } + #endif // PARTICLES_GPU - #endif //PARTICLES + if (index == 0) { + (this->*Fptr_Unload_Particle_Density)(0, 0, l_recv_buffer_x0); + } + if (index == 1) { + (this->*Fptr_Unload_Particle_Density)(0, 1, l_recv_buffer_x1); + } + if (index == 2) { + (this->*Fptr_Unload_Particle_Density)(1, 0, l_recv_buffer_y0); + } + if (index == 3) { + (this->*Fptr_Unload_Particle_Density)(1, 1, l_recv_buffer_y1); + } + if (index == 4) { + (this->*Fptr_Unload_Particle_Density)(2, 0, l_recv_buffer_z0); + } + if (index == 5) { + (this->*Fptr_Unload_Particle_Density)(2, 1, l_recv_buffer_z1); + } + } + #endif // PARTICLES } #endif /*MPI_CHOLLA*/ diff --git a/src/h_correction/flux_correction.h b/src/h_correction/flux_correction.h index 15aac55e3..6b1a2e055 100644 --- a/src/h_correction/flux_correction.h +++ b/src/h_correction/flux_correction.h @@ -1,22 +1,30 @@ /*! \file flux_correction.h - * \brief Declarations of functions used in the first-order flux correction method. */ + * \brief Declarations of functions used in the first-order flux correction + * method. */ #ifndef FLUX_CORRECTION_H #define FLUX_CORRECTION_H -void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt); +void Flux_Correction_3D(Real *C1, Real *C2, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, + Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt); void fill_flux_array_pcm(Real *C1, int idl, int idr, Real cW[], int n_cells, int dir); -void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], Real C_imt[], Real C_ipo[], Real C_ipt[], Real C_jmo[], Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], Real C_kmt[], Real C_kpo[], Real C_kpt[], int i, int j, int k, Real dx, Real dy, Real dz, Real dt, int n_fields, int nx, int ny, int nz, int n_cells); +void second_order_fluxes(Real *C1, Real *C2, Real C_i[], Real C_imo[], Real C_imt[], Real C_ipo[], Real C_ipt[], + Real C_jmo[], Real C_jmt[], Real C_jpo[], Real C_jpt[], Real C_kmo[], Real C_kmt[], + Real C_kpo[], Real C_kpt[], int i, int j, int k, Real dx, Real dy, Real dz, Real dt, + int n_fields, int nx, int ny, int nz, int n_cells); void average_cell(Real *C1, int i, int j, int k, int nx, int ny, int nz, int n_cells, int n_fields); -void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, int nx, int ny, int nz, int n_cells); +void first_order_fluxes(Real *C1, Real *C2, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, + int nx, int ny, int nz, int n_cells); -void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, int nx, int ny, int nz, int n_cells); +void first_order_update(Real *C1, Real *C_half, int i, int j, int k, Real dtodx, Real dtody, Real dtodz, int nfields, + int nx, int ny, int nz, int n_cells); -void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); +void calc_g_3D(int xid, int yid, int zid, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, + Real xbound, Real ybound, Real zbound, Real *gx, Real *gy, Real *gz); void cooling_CPU(Real *C2, int id, int n_cells, Real dt); @@ -24,4 +32,4 @@ Real Schure_cool_CPU(Real n, Real T); Real Wiersma_cool_CPU(Real n, Real T); -#endif //FLUX_CORRECTION_H +#endif // FLUX_CORRECTION_H diff --git a/src/h_correction/h_correction_2D_cuda.cu b/src/h_correction/h_correction_2D_cuda.cu index f04f7816e..d4e65d7cc 100644 --- a/src/h_correction/h_correction_2D_cuda.cu +++ b/src/h_correction/h_correction_2D_cuda.cu @@ -1,172 +1,168 @@ /*! \file h_correction_2D_cuda.cu * \brief Functions definitions for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA -#ifdef H_CORRECTION -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../h_correction/h_correction_2D_cuda.h" +#ifdef H_CORRECTION + #include + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../h_correction/h_correction_2D_cuda.h" + #include "../utils/gpu.hpp" -/*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma) +/*! \fn void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, + int nx, int ny, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, + Real gamma) { - int n_cells = nx*ny; + int n_cells = nx * ny; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-2 && yid < ny-n_ghost+1) - { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && yid < ny - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id] + - dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id] + - dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_x[id] = 0.5*fabs((dev_bounds_R[n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx; + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_x[id] = 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma) +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, + Real gamma) { - int n_cells = nx*ny; + int n_cells = nx * ny; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1) - { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] + - dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] + - dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_y[id] = 0.5*fabs((dev_bounds_R[2*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[2*n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx; + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_y[id] = 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) +/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) { - // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - id = xid + yid*nx; + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { + id = xid + yid * nx; - etah = fmax(eta_y[xid + (yid-1)*nx], eta_y[id]); - etah = fmax(etah, eta_x[id]); - etah = fmax(etah, eta_y[xid+1 + (yid-1)*nx]); - etah = fmax(etah, eta_y[xid+1 + yid*nx]); + etah = fmax(eta_y[xid + (yid - 1) * nx], eta_y[id]); + etah = fmax(etah, eta_x[id]); + etah = fmax(etah, eta_y[xid + 1 + (yid - 1) * nx]); + etah = fmax(etah, eta_y[xid + 1 + yid * nx]); - etah_x[id] = etah; + etah_x[id] = etah; } - } - -/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) +/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) { - // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost) - { - id = xid + yid*nx; + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost) { + id = xid + yid * nx; - etah = fmax(eta_x[xid-1 + yid*nx], eta_x[id]); - etah = fmax(etah, eta_y[id]); - etah = fmax(etah, eta_x[xid-1 + (yid+1)*nx]); - etah = fmax(etah, eta_x[xid + (yid+1)*nx]); + etah = fmax(eta_x[xid - 1 + yid * nx], eta_x[id]); + etah = fmax(etah, eta_y[id]); + etah = fmax(etah, eta_x[xid - 1 + (yid + 1) * nx]); + etah = fmax(etah, eta_x[xid + (yid + 1) * nx]); - etah_y[id] = etah; + etah_y[id] = etah; } - } - - - -#endif //H_CORRECTION -#endif //CUDA +#endif // H_CORRECTION diff --git a/src/h_correction/h_correction_2D_cuda.h b/src/h_correction/h_correction_2D_cuda.h index daa11e39a..9d824cf42 100644 --- a/src/h_correction/h_correction_2D_cuda.h +++ b/src/h_correction/h_correction_2D_cuda.h @@ -1,42 +1,46 @@ /*! \file h_correction_2D_cuda.h * \brief Functions declarations for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA -#ifdef H_CORRECTION -#ifndef H_CORRECTION_2D_H -#define H_CORRECTION_2D_H - -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" - - - -/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, Real gamma); - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, Real gamma); - - -/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost) +#ifdef H_CORRECTION + #ifndef H_CORRECTION_2D_H + #define H_CORRECTION_2D_H + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" + +/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int n_ghost, + Real gamma); + +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_2D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int n_ghost, + Real gamma); + +/*! \fn void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_x_2D(Real *eta_x, Real *eta_y, Real *etah_x, int nx, int ny, int n_ghost); - -/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost) +/*! \fn void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int + ny, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_y_2D(Real *eta_x, Real *eta_y, Real *etah_y, int nx, int ny, int n_ghost); - -#endif //H_CORRECTION_2D_H -#endif //H_CORRECTION -#endif //CUDA + #endif // H_CORRECTION_2D_H +#endif // H_CORRECTION diff --git a/src/h_correction/h_correction_3D_cuda.cu b/src/h_correction/h_correction_3D_cuda.cu index e58632eaf..b3609b529 100644 --- a/src/h_correction/h_correction_3D_cuda.cu +++ b/src/h_correction/h_correction_3D_cuda.cu @@ -1,263 +1,265 @@ /*! \file h_correction_3D_cuda.cu * \brief Functions definitions for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA -#include "../utils/gpu.hpp" #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../h_correction/h_correction_3D_cuda.h" +#include "../utils/gpu.hpp" - - -/*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) +/*! \fn void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, + int nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, + Real gamma) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; // x-direction - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-2 && yid < ny-n_ghost+1 && zid > n_ghost-2 && zid < nz-n_ghost+1) - { + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 2 && yid < ny - n_ghost + 1 && zid > n_ghost - 2 && + zid < nz - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx + zid*nx*ny; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id] + - dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id] + - dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_x[id] = 0.5*fabs((dev_bounds_R[n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx + zid * nx * ny; + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_x[id] = 0.5 * fabs((dev_bounds_R[n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, + Real gamma) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; // y-direction - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1 && zid > n_ghost-2 && zid < nz-n_ghost+1) - { + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1 && zid > n_ghost - 2 && + zid < nz - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx + zid*nx*ny; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id] + - dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] + - dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id] + - dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] + - dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_y[id] = 0.5*fabs((dev_bounds_R[2*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[2*n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx + zid * nx * ny; + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id] + + dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id] + + dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_y[id] = 0.5 * fabs((dev_bounds_R[2 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[2 * n_cells + id] / dev_bounds_L[id] - al)); } - } - -/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) +/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, + Real gamma) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; // declare primitive variables for each stencil // these will be placed into registers for each thread Real pl, pr, al, ar; // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; // z-direction - if (zid > n_ghost-2 && zid < nz-n_ghost && xid > n_ghost-2 && xid < nx-n_ghost+1 && yid > n_ghost-2 && yid < ny-n_ghost+1) - { + if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 2 && xid < nx - n_ghost + 1 && yid > n_ghost - 2 && + yid < ny - n_ghost + 1) { // load the interface values into registers - id = xid + yid*nx + zid*nx*ny; - pl = (dev_bounds_L[4*n_cells + id] - - 0.5*(dev_bounds_L[3*n_cells+id]*dev_bounds_L[3*n_cells+id] + - dev_bounds_L[ n_cells+id]*dev_bounds_L[ n_cells+id] + - dev_bounds_L[2*n_cells+id]*dev_bounds_L[2*n_cells+id])/dev_bounds_L[id]) * (gamma - 1.0); - pl = fmax(pl, (Real) 1.0e-20); - pr = (dev_bounds_R[4*n_cells + id] - - 0.5*(dev_bounds_R[3*n_cells+id]*dev_bounds_R[3*n_cells+id] + - dev_bounds_R[ n_cells+id]*dev_bounds_R[ n_cells+id] + - dev_bounds_R[2*n_cells+id]*dev_bounds_R[2*n_cells+id])/dev_bounds_R[id]) * (gamma - 1.0); - pr = fmax(pr, (Real) 1.0e-20); - - al = sqrt(gamma*pl/dev_bounds_L[id]); - ar = sqrt(gamma*pl/dev_bounds_R[id]); - - eta_z[id] = 0.5*fabs((dev_bounds_R[3*n_cells+id]/dev_bounds_R[id] + ar) - (dev_bounds_L[3*n_cells+id]/dev_bounds_L[id] - al)); - + id = xid + yid * nx + zid * nx * ny; + pl = (dev_bounds_L[4 * n_cells + id] - 0.5 * + (dev_bounds_L[3 * n_cells + id] * dev_bounds_L[3 * n_cells + id] + + dev_bounds_L[n_cells + id] * dev_bounds_L[n_cells + id] + + dev_bounds_L[2 * n_cells + id] * dev_bounds_L[2 * n_cells + id]) / + dev_bounds_L[id]) * + (gamma - 1.0); + pl = fmax(pl, (Real)1.0e-20); + pr = (dev_bounds_R[4 * n_cells + id] - 0.5 * + (dev_bounds_R[3 * n_cells + id] * dev_bounds_R[3 * n_cells + id] + + dev_bounds_R[n_cells + id] * dev_bounds_R[n_cells + id] + + dev_bounds_R[2 * n_cells + id] * dev_bounds_R[2 * n_cells + id]) / + dev_bounds_R[id]) * + (gamma - 1.0); + pr = fmax(pr, (Real)1.0e-20); + + al = sqrt(gamma * pl / dev_bounds_L[id]); + ar = sqrt(gamma * pl / dev_bounds_R[id]); + + eta_z[id] = 0.5 * fabs((dev_bounds_R[3 * n_cells + id] / dev_bounds_R[id] + ar) - + (dev_bounds_L[3 * n_cells + id] / dev_bounds_L[id] - al)); } - } - - -/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) { - // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; // x-direction - if (xid > n_ghost-2 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - id = xid + yid*nx + zid*nx*ny; + if (xid > n_ghost - 2 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_y[xid + (yid-1)*nx + zid*nx*ny], eta_y[xid+1 + (yid-1)*nx + zid*nx*ny]); + etah = fmax(eta_y[xid + (yid - 1) * nx + zid * nx * ny], eta_y[xid + 1 + (yid - 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_y[id]); - etah = fmax(etah, eta_y[xid+1 + yid*nx + zid*nx*ny]); + etah = fmax(etah, eta_y[xid + 1 + yid * nx + zid * nx * ny]); - etah = fmax(etah, eta_z[xid + yid*nx + (zid-1)*nx*ny]); - etah = fmax(etah, eta_z[xid+1 + yid*nx + (zid-1)*nx*ny]); + etah = fmax(etah, eta_z[xid + yid * nx + (zid - 1) * nx * ny]); + etah = fmax(etah, eta_z[xid + 1 + yid * nx + (zid - 1) * nx * ny]); etah = fmax(etah, eta_z[id]); - etah = fmax(etah, eta_z[xid+1 + yid*nx + zid*nx*ny]); + etah = fmax(etah, eta_z[xid + 1 + yid * nx + zid * nx * ny]); etah = fmax(etah, eta_x[id]); etah_x[id] = etah; - } - } - -/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) { - // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; // y-direction - if (yid > n_ghost-2 && yid < ny-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - id = xid + yid*nx + zid*nx*ny; + if (yid > n_ghost - 2 && yid < ny - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_z[xid + yid*nx + (zid-1)*nx*ny], eta_z[xid + (yid+1)*nx + (zid-1)*nx*ny]); + etah = fmax(eta_z[xid + yid * nx + (zid - 1) * nx * ny], eta_z[xid + (yid + 1) * nx + (zid - 1) * nx * ny]); etah = fmax(etah, eta_z[id]); - etah = fmax(etah, eta_z[xid + (yid+1)*nx + zid*nx*ny]); + etah = fmax(etah, eta_z[xid + (yid + 1) * nx + zid * nx * ny]); - etah = fmax(etah, eta_x[xid-1 + yid*nx + zid*nx*ny]); - etah = fmax(etah, eta_x[xid-1 + (yid+1)*nx + zid*nx*ny]); + etah = fmax(etah, eta_x[xid - 1 + yid * nx + zid * nx * ny]); + etah = fmax(etah, eta_x[xid - 1 + (yid + 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_x[id]); - etah = fmax(etah, eta_x[xid + (yid+1)*nx + zid*nx*ny]); + etah = fmax(etah, eta_x[xid + (yid + 1) * nx + zid * nx * ny]); etah = fmax(etah, eta_y[id]); etah_y[id] = etah; - } - } - - -/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ __global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) { - // get a thread ID - int tid = threadIdx.x + blockIdx.x*blockDim.x; + int tid = threadIdx.x + blockIdx.x * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; Real etah; // z-direction - if (zid > n_ghost-2 && zid < nz-n_ghost && xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - id = xid + yid*nx + zid*nx*ny; + if (zid > n_ghost - 2 && zid < nz - n_ghost && xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && + yid < ny - n_ghost) { + id = xid + yid * nx + zid * nx * ny; - etah = fmax(eta_x[xid-1 + yid*nx + zid*nx*ny], eta_x[xid-1 + yid*nx + (zid+1)*nx*ny]); + etah = fmax(eta_x[xid - 1 + yid * nx + zid * nx * ny], eta_x[xid - 1 + yid * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_x[id]); - etah = fmax(etah, eta_x[xid + yid*nx + (zid+1)*nx*ny]); + etah = fmax(etah, eta_x[xid + yid * nx + (zid + 1) * nx * ny]); - etah = fmax(etah, eta_y[xid + (yid-1)*nx + zid*nx*ny]); - etah = fmax(etah, eta_y[xid + (yid-1)*nx + (zid+1)*nx*ny]); + etah = fmax(etah, eta_y[xid + (yid - 1) * nx + zid * nx * ny]); + etah = fmax(etah, eta_y[xid + (yid - 1) * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_y[id]); - etah = fmax(etah, eta_y[xid + yid*nx + (zid+1)*nx*ny]); + etah = fmax(etah, eta_y[xid + yid * nx + (zid + 1) * nx * ny]); etah = fmax(etah, eta_z[id]); etah_z[id] = etah; - } - } - - -#endif //CUDA diff --git a/src/h_correction/h_correction_3D_cuda.h b/src/h_correction/h_correction_3D_cuda.h index b22041423..c1d2f8a49 100644 --- a/src/h_correction/h_correction_3D_cuda.h +++ b/src/h_correction/h_correction_3D_cuda.h @@ -1,50 +1,59 @@ /*! \file h_correction_3D_cuda.h * \brief Functions declarations for the H correction kernels. Written following Sanders et al. 1998. */ -#ifdef CUDA + #ifndef H_CORRECTION_3D_H #define H_CORRECTION_3D_H -#include "../utils/gpu.hpp" #include "../global/global.h" +#include "../utils/gpu.hpp" - - -/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, Real gamma); - - -/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, Real gamma); - - -/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma) - * \brief When passed the left and right boundary values at an interface, calculates - the eta value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, Real gamma); - - -/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_eta_x(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_x_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_x, int nx, int ny, int nz, int n_ghost, + Real gamma); + +/*! \fn void calc_eta_y(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_y_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_y, int nx, int ny, int nz, int n_ghost, + Real gamma); + +/*! \fn void calc_eta_z(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int + nx, int ny, int nz, int n_ghost, Real gamma) + * \brief When passed the left and right boundary values at an interface, + calculates the eta value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_eta_z_3D(Real *dev_bounds_L, Real *dev_bounds_R, Real *eta_z, int nx, int ny, int nz, int n_ghost, + Real gamma); + +/*! \fn void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, int n_ghost); + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_x_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_x, int nx, int ny, int nz, + int n_ghost); - -/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, int n_ghost); - + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_y_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_y, int nx, int ny, int nz, + int n_ghost); -/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost) +/*! \fn void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, + int nx, int ny, int nz, int n_ghost) * \brief When passed the eta values at every interface, calculates - the eta_h value for the interface according to the forumulation in Sanders et al, 1998. */ -__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, int n_ghost); - + the eta_h value for the interface according to the forumulation in + Sanders et al, 1998. */ +__global__ void calc_etah_z_3D(Real *eta_x, Real *eta_y, Real *eta_z, Real *etah_z, int nx, int ny, int nz, + int n_ghost); -#endif //H_CORRECTION_3D_H -#endif //CUDA +#endif // H_CORRECTION_3D_H diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index ee033e334..4d0661fbd 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -1,153 +1,159 @@ /*! \file hydro_cuda.cu * \brief Definitions of functions used in all cuda integration algorithms. */ -#ifdef CUDA -#include -#include #include +#include +#include + +#include -#include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" #include "../gravity/static_grav.h" -#include "../utils/hydro_utilities.h" +#include "../hydro/hydro_cuda.h" +#include "../utils/DeviceVector.h" #include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" #include "../utils/reduction_utilities.h" - -__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav) { int id; - #ifdef STATIC_GRAV +#ifdef STATIC_GRAV Real d, d_inv, vx; Real gx, d_n, d_inv_n, vx_n; gx = 0.0; - #endif +#endif - Real dtodx = dt/dx; + Real dtodx = dt / dx; // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; - // threads corresponding to real cells do the calculation - if (id > n_ghost - 1 && id < n_cells-n_ghost) - { - #ifdef STATIC_GRAV - d = dev_conserved[ id]; + if (id > n_ghost - 1 && id < n_cells - n_ghost) { +#ifdef STATIC_GRAV + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; +#endif // update the conserved variable array - dev_conserved[ id] += dtodx * (dev_F[ id-1] - dev_F[ id]); - dev_conserved[ n_cells + id] += dtodx * (dev_F[ n_cells + id-1] - dev_F[ n_cells + id]); - dev_conserved[2*n_cells + id] += dtodx * (dev_F[2*n_cells + id-1] - dev_F[2*n_cells + id]); - dev_conserved[3*n_cells + id] += dtodx * (dev_F[3*n_cells + id-1] - dev_F[3*n_cells + id]); - dev_conserved[4*n_cells + id] += dtodx * (dev_F[4*n_cells + id-1] - dev_F[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - #ifdef STATIC_GRAV - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { +#ifdef STATIC_GRAV + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; +#endif // update the conserved variable array - dev_conserved[ id] += dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]); - dev_conserved[ n_cells + id] += dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]); - dev_conserved[2*n_cells + id] += dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]); - dev_conserved[3*n_cells + id] += dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]); - dev_conserved[4*n_cells + id] += dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - #if defined(STATIC_GRAV) || defined(GRAVITY) - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { +#if defined(STATIC_GRAV) || defined(GRAVITY) + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; +#endif // update the conserved variable array - dev_conserved[ id] += dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]) - + dtodz * (dev_F_z[ kmo] - dev_F_z[ id]); - dev_conserved[ n_cells + id] += dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]) - + dtodz * (dev_F_z[ n_cells + kmo] - dev_F_z[ n_cells + id]); - dev_conserved[2*n_cells + id] += dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]) - + dtodz * (dev_F_z[2*n_cells + kmo] - dev_F_z[2*n_cells + id]); - dev_conserved[3*n_cells + id] += dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]) - + dtodz * (dev_F_z[3*n_cells + kmo] - dev_F_z[3*n_cells + id]); - dev_conserved[4*n_cells + id] += dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]) - + dtodz * (dev_F_z[4*n_cells + kmo] - dev_F_z[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i 0){ - dens_0 = dev_conserved[ id]; +#endif +#ifdef DE + dev_conserved[(n_fields - 1) * n_cells + id] += + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + + dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]); + // + 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + + // dtodz*(vz_kmo-vz_kpo)); + // Note: this term is added in a separate kernel to avoid synchronization + // issues +#endif + +#ifdef DENSITY_FLOOR + if (dev_conserved[id] < density_floor) { + if (dev_conserved[id] > 0) { + dens_0 = dev_conserved[id]; // Set the density to the density floor - dev_conserved[ id] = density_floor; + dev_conserved[id] = density_floor; // Scale the conserved values to the new density - dev_conserved[1*n_cells + id] *= (density_floor / dens_0); - dev_conserved[2*n_cells + id] *= (density_floor / dens_0); - dev_conserved[3*n_cells + id] *= (density_floor / dens_0); - dev_conserved[4*n_cells + id] *= (density_floor / dens_0); - #ifdef DE - dev_conserved[(n_fields-1)*n_cells + id] *= (density_floor / dens_0); - #endif - } - else{ + dev_conserved[1 * n_cells + id] *= (density_floor / dens_0); + dev_conserved[2 * n_cells + id] *= (density_floor / dens_0); + dev_conserved[3 * n_cells + id] *= (density_floor / dens_0); + dev_conserved[4 * n_cells + id] *= (density_floor / dens_0); + #ifdef DE + dev_conserved[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); + #endif + } else { // If the density is negative: average the density on that cell - dens_0 = dev_conserved[ id]; - Average_Cell_Single_Field( 0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved ); + dens_0 = dev_conserved[id]; + Average_Cell_Single_Field(0, xid, yid, zid, nx, ny, nz, n_cells, dev_conserved); } } - #endif//DENSITY_FLOOR +#endif // DENSITY_FLOOR - #ifdef STATIC_GRAV - calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, &gz); - d_n = dev_conserved[ id]; +#ifdef STATIC_GRAV + calc_g_3D(xid, yid, zid, x_off, y_off, z_off, n_ghost, custom_grav, dx, dy, dz, xbound, ybound, zbound, &gx, &gy, + &gz); + d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; - vx_n = dev_conserved[1*n_cells + id] * d_inv_n; - vy_n = dev_conserved[2*n_cells + id] * d_inv_n; - vz_n = dev_conserved[3*n_cells + id] * d_inv_n; - dev_conserved[ n_cells + id] += 0.5*dt*gx*(d + d_n); - dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n); - dev_conserved[3*n_cells + id] += 0.5*dt*gz*(d + d_n); - dev_conserved[4*n_cells + id] += 0.25*dt*gx*(d + d_n)*(vx + vx_n) - + 0.25*dt*gy*(d + d_n)*(vy + vy_n) - + 0.25*dt*gz*(d + d_n)*(vz + vz_n); - #endif - - #ifdef GRAVITY - d_n = dev_conserved[ id]; + vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; + vy_n = dev_conserved[2 * n_cells + id] * d_inv_n; + vz_n = dev_conserved[3 * n_cells + id] * d_inv_n; + dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); + dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); + dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n); + dev_conserved[4 * n_cells + id] += 0.25 * dt * gx * (d + d_n) * (vx + vx_n) + + 0.25 * dt * gy * (d + d_n) * (vy + vy_n) + + 0.25 * dt * gz * (d + d_n) * (vz + vz_n); +#endif + +#ifdef GRAVITY + d_n = dev_conserved[id]; d_inv_n = 1.0 / d_n; - vx_n = dev_conserved[1*n_cells + id] * d_inv_n; - vy_n = dev_conserved[2*n_cells + id] * d_inv_n; - vz_n = dev_conserved[3*n_cells + id] * d_inv_n; + vx_n = dev_conserved[1 * n_cells + id] * d_inv_n; + vy_n = dev_conserved[2 * n_cells + id] * d_inv_n; + vz_n = dev_conserved[3 * n_cells + id] * d_inv_n; // Calculate the -gradient of potential // Get X componet of gravity field - id_l = (xid-1) + (yid)*nx + (zid)*nx*ny; - id_r = (xid+1) + (yid)*nx + (zid)*nx*ny; + id_l = (xid - 1) + (yid)*nx + (zid)*nx * ny; + id_r = (xid + 1) + (yid)*nx + (zid)*nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (xid-2) + (yid)*nx + (zid)*nx*ny; - id_rr = (xid+2) + (yid)*nx + (zid)*nx*ny; + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (xid - 2) + (yid)*nx + (zid)*nx * ny; + id_rr = (xid + 2) + (yid)*nx + (zid)*nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; - gx = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx); - #else - gx = -0.5*( pot_r - pot_l ) / dx; - #endif - - //Get Y componet of gravity field - id_l = (xid) + (yid-1)*nx + (zid)*nx*ny; - id_r = (xid) + (yid+1)*nx + (zid)*nx*ny; + gx = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); + #else + gx = -0.5 * (pot_r - pot_l) / dx; + #endif + + // Get Y componet of gravity field + id_l = (xid) + (yid - 1) * nx + (zid)*nx * ny; + id_r = (xid) + (yid + 1) * nx + (zid)*nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (xid) + (yid-2)*nx + (zid)*nx*ny; - id_rr = (xid) + (yid+2)*nx + (zid)*nx*ny; + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (xid) + (yid - 2) * nx + (zid)*nx * ny; + id_rr = (xid) + (yid + 2) * nx + (zid)*nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; - gy = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx); - #else - gy = -0.5*( pot_r - pot_l ) / dy; - #endif - //Get Z componet of gravity field - id_l = (xid) + (yid)*nx + (zid-1)*nx*ny; - id_r = (xid) + (yid)*nx + (zid+1)*nx*ny; + gy = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); + #else + gy = -0.5 * (pot_r - pot_l) / dy; + #endif + // Get Z componet of gravity field + id_l = (xid) + (yid)*nx + (zid - 1) * nx * ny; + id_r = (xid) + (yid)*nx + (zid + 1) * nx * ny; pot_l = dev_potential[id_l]; pot_r = dev_potential[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (xid) + (yid)*nx + (zid-2)*nx*ny; - id_rr = (xid) + (yid)*nx + (zid+2)*nx*ny; + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (xid) + (yid)*nx + (zid - 2) * nx * ny; + id_rr = (xid) + (yid)*nx + (zid + 2) * nx * ny; pot_ll = dev_potential[id_ll]; pot_rr = dev_potential[id_rr]; - gz = -1 * ( -pot_rr + 8*pot_r - 8*pot_l + pot_ll) / (12*dx); - #else - gz = -0.5*( pot_r - pot_l ) / dz; - #endif - - //Add gravity term to Momentum - dev_conserved[ n_cells + id] += 0.5*dt*gx*(d + d_n); - dev_conserved[2*n_cells + id] += 0.5*dt*gy*(d + d_n); - dev_conserved[3*n_cells + id] += 0.5*dt*gz*(d + d_n); - - //Add gravity term to Total Energy - //Add the work done by the gravitational force - dev_conserved[4*n_cells + id] += 0.5* dt * ( gx*(d*vx + d_n*vx_n) + gy*(d*vy + d_n*vy_n) + gz*(d*vz + d_n*vz_n) ); - - #endif - + gz = -1 * (-pot_rr + 8 * pot_r - 8 * pot_l + pot_ll) / (12 * dx); + #else + gz = -0.5 * (pot_r - pot_l) / dz; + #endif - #if !( defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR) ) - if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4*n_cells + id] < 0.0 || dev_conserved[4*n_cells+id] != dev_conserved[4*n_cells+id]) { - printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid+x_off, yid+y_off, zid+z_off, dev_conserved[id], dtodx*(dev_F_x[imo]-dev_F_x[id]), dtody*(dev_F_y[jmo]-dev_F_y[id]), dtodz*(dev_F_z[kmo]-dev_F_z[id]), dev_conserved[4*n_cells+id]); + // Add gravity term to Momentum + dev_conserved[n_cells + id] += 0.5 * dt * gx * (d + d_n); + dev_conserved[2 * n_cells + id] += 0.5 * dt * gy * (d + d_n); + dev_conserved[3 * n_cells + id] += 0.5 * dt * gz * (d + d_n); + + // Add gravity term to Total Energy + // Add the work done by the gravitational force + dev_conserved[4 * n_cells + id] += + 0.5 * dt * (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + gz * (d * vz + d_n * vz_n)); + +#endif // GRAVITY + +#if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) + if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 || + dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { + printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, + dev_conserved[id], dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]), + dtodz * (dev_F_z[kmo] - dev_F_z[id]), dev_conserved[4 * n_cells + id]); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); } - #endif//DENSITY_FLOOR +#endif // DENSITY_FLOOR /* d = dev_conserved[ id]; d_inv = 1.0 / d; vx = dev_conserved[1*n_cells + id] * d_inv; vy = dev_conserved[2*n_cells + id] * d_inv; vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - if (P < 0.0) printf("%3d %3d %3d Negative pressure after final update. %f %f %f %f %f\n", xid, yid, zid, dev_conserved[4*n_cells + id], 0.5*d*vx*vx, 0.5*d*vy*vy, 0.5*d*vz*vz, P); + P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * + (gamma - 1.0); if (P < 0.0) printf("%3d %3d %3d Negative pressure after + final update. %f %f %f %f %f\n", xid, yid, zid, dev_conserved[4*n_cells + + id], 0.5*d*vx*vx, 0.5*d*vy*vy, 0.5*d*vz*vz, P); */ } - } - __device__ __host__ Real hydroInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &dx, - Real const &dy, - Real const &dz, - Real const &gamma) +__device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &dx, Real const &dy, + Real const &dz, Real const &gamma) { // Compute pressure and sound speed - Real P = (E - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); + Real P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); Real cs = sqrt(d_inv * gamma * P); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) - Real cellMaxInverseDt = fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy); - cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz)+cs)/dz); + Real cellMaxInverseDt = fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy); + cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz) + cs) / dz); cellMaxInverseDt = fmax(cellMaxInverseDt, 0.0); return cellMaxInverseDt; } -__device__ __host__ Real mhdInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &avgBx, - Real const &avgBy, - Real const &avgBz, - Real const &dx, - Real const &dy, - Real const &dz, +__device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, + Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, Real const &gamma) { // Compute the gas pressure and fast magnetosonic speed - Real gasP = mhdUtils::computeGasPressure(E, d, vx*d, vy*d, vz*d, avgBx, avgBy, avgBz, gamma); - Real cf = mhdUtils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); + Real gasP = hydro_utilities::Calc_Pressure_Primitive(E, d, vx, vy, vz, gamma, avgBx, avgBy, avgBz); + Real cf = mhd::utils::fastMagnetosonicSpeed(d, gasP, avgBx, avgBy, avgBz, gamma); // Find maximum inverse crossing time in the cell (i.e. minimum crossing time) - Real cellMaxInverseDt = fmax((fabs(vx)+cf)/dx, (fabs(vy)+cf)/dy); - cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz)+cf)/dz); + Real cellMaxInverseDt = fmax((fabs(vx) + cf) / dx, (fabs(vy) + cf) / dy); + cellMaxInverseDt = fmax(cellMaxInverseDt, (fabs(vz) + cf) / dz); cellMaxInverseDt = fmax(cellMaxInverseDt, 0.0); return cellMaxInverseDt; } - - __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, Real dx) { Real max_dti = -DBL_MAX; @@ -450,401 +446,378 @@ __global__ void Calc_dt_1D(Real *dev_conserved, Real *dev_dti, Real gamma, int n // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // threads corresponding to real cells do the calculation - if (id > n_ghost - 1 && id < n_cells-n_ghost) - { + if (id > n_ghost - 1 && id < n_cells - n_ghost) { // start timestep calculation here - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + // every thread collects the conserved variables it needs from global + // memory + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, (Real) TINY_NUMBER); - // find the max wavespeed in that cell, use it to calculate the inverse timestep - cs = sqrt(d_inv * gamma * P); - max_dti = fmax(max_dti,(fabs(vx)+cs)/dx); + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + P = fmax(P, (Real)TINY_NUMBER); + // find the max wavespeed in that cell, use it to calculate the inverse + // timestep + cs = sqrt(d_inv * gamma * P); + max_dti = fmax(max_dti, (fabs(vx) + cs) / dx); } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } - - -__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx, Real dy) +__global__ void Calc_dt_2D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int nx, int ny, Real dx, + Real dy) { Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, P, cs; int xid, yid, n_cells; - n_cells = nx*ny; + n_cells = nx * ny; // Grid stride loop to perform as much of the reduction as possible. The // fact that `id` has type `size_t` is important. I'm not totally sure why // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // get a global thread ID yid = id / nx; - xid = id - yid*nx; + xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { + // every thread collects the conserved variables it needs from global + // memory + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, (Real) 1.0e-20); - // find the max wavespeed in that cell, use it to calculate the inverse timestep - cs = sqrt(d_inv * gamma * P); - max_dti = fmax(max_dti,fmax((fabs(vx)+cs)/dx, (fabs(vy)+cs)/dy)); + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + P = fmax(P, (Real)1.0e-20); + // find the max wavespeed in that cell, use it to calculate the inverse + // timestep + cs = sqrt(d_inv * gamma * P); + max_dti = fmax(max_dti, fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy)); } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } - -__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, + int nz, Real dx, Real dy, Real dz) { Real max_dti = -DBL_MAX; Real d, d_inv, vx, vy, vz, E; - #ifdef MHD - Real avgBx, avgBy, avgBz; - #endif //MHD int xid, yid, zid, n_cells; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // Grid stride loop to perform as much of the reduction as possible. The // fact that `id` has type `size_t` is important. I'm not totally sure why // but setting it to int results in some kind of silent over/underflow issue // even though we're not hitting those kinds of numbers. Setting it to type // uint or size_t fixes them - for(size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) - { + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { // get a global thread ID cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + // every thread collects the conserved variables it needs from global + // memory + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - #ifdef MHD - // Compute the cell centered magnetic field using a straight average of - // the faces - mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); - #endif //MHD - - // Compute the maximum inverse crossing time in the cell - #ifdef MHD - max_dti = fmax(max_dti,mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); - #else // not MHD - max_dti = fmax(max_dti,hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); - #endif //MHD - + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + +// Compute the maximum inverse crossing time in the cell +#ifdef MHD + // Compute the cell centered magnetic field using a straight average of + // the faces + auto const [avgBx, avgBy, avgBz] = + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + max_dti = fmax(max_dti, mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma)); +#else // not MHD + max_dti = fmax(max_dti, hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma)); +#endif // MHD } } - // do the block wide reduction (find the max inverse timestep in the block) - // then write it to that block's location in the dev_dti array - max_dti = reduction_utilities::blockReduceMax(max_dti); - if (threadIdx.x == 0) dev_dti[blockIdx.x] = max_dti; + // do the grid wide reduction (find the max inverse timestep in the grid) + reduction_utilities::gridReduceMax(max_dti, dev_dti); } -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ) +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real gamma) { - // set values for GPU kernels - uint threadsPerBlock, numBlocks; - int ngrid = (nx*ny*nz + TPB - 1 )/TPB; - // reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); // Uncomment this if we fix the AtomicDouble bug - Alwin - threadsPerBlock = TPB; - numBlocks = ngrid; - - Real* dev_dti = dev_dti_array; + // Allocate the device memory + cuda_utilities::DeviceVector static dev_dti(1); + // Set the device side inverse time step to the smallest possible double so + // that the reduction isn't using the maximum value of the previous iteration + dev_dti.assign(std::numeric_limits::lowest()); // compute dt and store in dev_dti - if (nx > 1 && ny == 1 && nz == 1) //1D + if (nx > 1 && ny == 1 && nz == 1) // 1D { - hipLaunchKernelGGL(Calc_dt_1D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, dx); - } - else if (nx > 1 && ny > 1 && nz == 1) //2D + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_1D); + hipLaunchKernelGGL(Calc_dt_1D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, + dev_dti.data(), gamma, n_ghost, nx, dx); + } else if (nx > 1 && ny > 1 && nz == 1) // 2D { - hipLaunchKernelGGL(Calc_dt_2D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, nx, ny, dx, dy); - } - else if (nx > 1 && ny > 1 && nz > 1) //3D + // set launch parameters for GPU kernels. + cuda_utilities::AutomaticLaunchParams static const launchParams(Calc_dt_2D); + hipLaunchKernelGGL(Calc_dt_2D, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, dev_conserved, + dev_dti.data(), gamma, n_ghost, nx, ny, dx, dy); + } else if (nx > 1 && ny > 1 && nz > 1) // 3D { - hipLaunchKernelGGL(Calc_dt_3D, numBlocks, threadsPerBlock, 0, 0, dev_conserved, dev_dti, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); - } - CudaCheckError(); - - Real max_dti=0; - - /* Uncomment the below if we fix the AtomicDouble bug - Alwin - // copy device side max_dti to host side max_dti - - - CudaSafeCall( cudaMemcpy(&max_dti, dev_dti, sizeof(Real), cudaMemcpyDeviceToHost) ); - cudaDeviceSynchronize(); - - return max_dti; - */ - - int dev_dti_length = numBlocks; - CudaSafeCall(cudaMemcpy(host_dti_array,dev_dti, dev_dti_length*sizeof(Real), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i=0;i 1 && ny > 1 && nz > 1){ //3D - hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, dx, dy, dz, gamma, max_dti_slow ); + if (nx > 1 && ny > 1 && nz > 1) { // 3D + hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, + dx, dy, dz, gamma, max_dti_slow); } } -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ){ - +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, + Real dy, Real dz, Real gamma, Real max_dti_slow) +{ int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, max_dti; - #ifdef MHD - Real avgBx, avgBy, avgBz; - #endif //MHD + Real speed, temp, P, cs; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - n_cells = nx*ny*nz; + id = threadIdx.x + blockIdx.x * blockDim.x; + n_cells = nx * ny * nz; cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - - #ifdef MHD - // Compute the cell centered magnetic field using a straight average of the faces - mhdUtils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny, avgBx, avgBy, avgBz); - #endif //MHD + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; // Compute the maximum inverse crossing time in the cell - #ifdef MHD - max_dti = mhdInverseCrossingTime(E, d, d_inv, vx, vy, vz, avgBx, avgBy, avgBz, dx, dy, dz, gamma); - #else // not MHD - max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); - #endif //MHD + max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); - if (max_dti > max_dti_slow){ + if (max_dti > max_dti_slow) { + speed = sqrt(vx * vx + vy * vy + vz * vz); + temp = (gamma - 1) * (E - 0.5 * (speed * speed) * d) * ENERGY_UNIT / (d * DENSITY_UNIT / 0.6 / MP) / KB; + P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + cs = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5; // Average this cell - printf(" Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f\n", xid, yid, zid, 1./max_dti, 1./max_dti_slow ); - Average_Cell_All_Fields( xid, yid, zid, nx, ny, nz, n_cells, n_fields, dev_conserved ); + kernel_printf( + " Average Slow Cell [ %d %d %d ] -> dt_cell=%f dt_min=%f, n=%.3e, " + "T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", + xid, yid, zid, 1. / max_dti, 1. / max_dti_slow, dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, + speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5, + cs); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); } } } -#endif //AVERAGE_SLOW_CELLS - +#endif // AVERAGE_SLOW_CELLS #ifdef DE -__global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ){ - +__global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, + int n_ghost, Real dx, Real dt, Real gamma, int n_fields) +{ int id, xid, n_cells; int imo, ipo; Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo; - Real P, E, E_kin, GE; + Real P, E, E_kin, GE; - - Real dtodx = dt/dx; - n_cells = nx; + Real dtodx = dt / dx; + n_cells = nx; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; + id = threadIdx.x + blockIdx.x * blockDim.x; xid = id; - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - - imo = xid-1; - ipo = xid+1; - - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + + imo = xid - 1; + ipo = xid + 1; + + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo)); - + dev_conserved[(n_fields - 1) * n_cells + id] += 0.5 * P * (dtodx * (vx_imo - vx_ipo)); } } - -__global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields ){ - +__global__ void Partial_Update_Advected_Internal_Energy_2D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields) +{ int id, xid, yid, n_cells; int imo, jmo; int ipo, jpo; Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo; - Real P, E, E_kin, GE; + Real P, E, E_kin, GE; - - Real dtodx = dt/dx; - Real dtody = dt/dy; - n_cells = nx*ny; + Real dtodx = dt / dx; + Real dtody = dt / dy; + n_cells = nx * ny; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - - imo = xid-1 + yid*nx; - ipo = xid+1 + yid*nx; - jmo = xid + (yid-1)*nx; - jpo = xid + (yid+1)*nx; - - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + E_kin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + + imo = xid - 1 + yid * nx; + ipo = xid + 1 + yid * nx; + jmo = xid + (yid - 1) * nx; + jpo = xid + (yid + 1) * nx; + + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo)); - + dev_conserved[(n_fields - 1) * n_cells + id] += 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo)); } } -__global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields ){ - +__global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, + int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, + int n_fields) +{ int id, xid, yid, zid, n_cells; int imo, jmo, kmo; int ipo, jpo, kpo; Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo; - Real P, E, E_kin, GE; + Real P, E, E_kin, GE; // Real vx_L, vx_R, vy_L, vy_R, vz_L, vz_R; - - Real dtodx = dt/dx; - Real dtody = dt/dy; - Real dtodz = dt/dz; - n_cells = nx*ny*nz; + Real dtodx = dt / dx; + Real dtody = dt / dy; + Real dtodz = dt / dz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - - imo = xid-1 + yid*nx + zid*nx*ny; - jmo = xid + (yid-1)*nx + zid*nx*ny; - kmo = xid + yid*nx + (zid-1)*nx*ny; - - ipo = xid+1 + yid*nx + zid*nx*ny; - jpo = xid + (yid+1)*nx + zid*nx*ny; - kpo = xid + yid*nx + (zid+1)*nx*ny; - - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; - vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo]; - vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + // Add the magnetic energy + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + E_kin += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + #endif // MHD + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + + imo = xid - 1 + yid * nx + zid * nx * ny; + jmo = xid + (yid - 1) * nx + zid * nx * ny; + kmo = xid + yid * nx + (zid - 1) * nx * ny; + + ipo = xid + 1 + yid * nx + zid * nx * ny; + jpo = xid + (yid + 1) * nx + zid * nx * ny; + kpo = xid + yid * nx + (zid + 1) * nx * ny; + + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; + vz_kmo = dev_conserved[3 * n_cells + kmo] / dev_conserved[kmo]; + vz_kpo = dev_conserved[3 * n_cells + kpo] / dev_conserved[kpo]; // Use center values of neighbor cells for the divergence of velocity - dev_conserved[(n_fields-1)*n_cells + id] += 0.5*P*(dtodx*(vx_imo-vx_ipo) + dtody*(vy_jmo-vy_jpo) + dtodz*(vz_kmo-vz_kpo)); + dev_conserved[(n_fields - 1) * n_cells + id] += + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo)); - // OPTION 2: Use the reconstructed velocities to compute the velocity gradient - //Use the reconstructed Velocities instead of neighbor cells centered values + // OPTION 2: Use the reconstructed velocities to compute the velocity + // gradient + // Use the reconstructed Velocities instead of neighbor cells centered + // values // vx_R = Q_Lx[1*n_cells + id] / Q_Lx[id]; // vx_L = Q_Rx[1*n_cells + imo] / Q_Rx[imo]; // vy_R = Q_Ly[2*n_cells + id] / Q_Ly[id]; @@ -852,183 +825,193 @@ __global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, // vz_R = Q_Lz[3*n_cells + id] / Q_Lz[id]; // vz_L = Q_Rz[3*n_cells + kmo] / Q_Rz[kmo]; - //Use the reconstructed Velocities instead of neighbor cells centered values - // dev_conserved[(n_fields-1)*n_cells + id] += P * ( dtodx * ( vx_L - vx_R ) + dtody * ( vy_L - vy_R ) + dtodz * ( vz_L - vz_R ) ); - - + // Use the reconstructed Velocities instead of neighbor cells centered + // values + // dev_conserved[(n_fields-1)*n_cells + id] += P * ( dtodx * ( vx_L - vx_R + // ) + dtody * ( vy_L - vy_R ) + dtodz * ( vz_L - vz_R ) ); } } - -__global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_ghost, int n_fields ){ - +__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_ghost, int n_fields) +{ int id, xid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; int imo, ipo; n_cells = nx; + Real eta_1 = DE_ETA_1; Real eta_2 = DE_ETA_2; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; + id = threadIdx.x + blockIdx.x * blockDim.x; xid = id; - imo = max(xid-1, n_ghost); - ipo = min(xid+1, nx-n_ghost-1); - + imo = max(xid - 1, n_ghost); + ipo = min(xid + 1, nx - n_ghost - 1); // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - U_advected = dev_conserved[(n_fields-1)*n_cells + id]; - U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz ); - - //find the max nearby total energy - Emax = fmax(dev_conserved[4*n_cells + imo], E); - Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]); - - if (U_total/Emax > eta_2 ) U = U_total; - else U = U_advected; - - //Optional: Avoid Negative Internal Energies - U = fmax(U, (Real) TINY_NUMBER); + d = dev_conserved[id]; + d_inv = 1.0 / d; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + U_advected = dev_conserved[(n_fields - 1) * n_cells + id]; + U_total = E - 0.5 * d * (vx * vx + vy * vy + vz * vz); + + // find the max nearby total energy + Emax = fmax(dev_conserved[4 * n_cells + imo], E); + Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); + + // We only use the "advected" internal energy if both: + // - the thermal energy divided by total energy is a small fraction (smaller than eta_1) + // - AND we aren't masking shock heating (details controlled by Emax & eta_2) + if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) { + U = U_total; + } else { + U = U_advected; + } - //Write Selected internal energy to the GasEnergy array ONLY - //to avoid mixing updated and non-updated values of E - //since the Dual Energy condition depends on the neighbor cells - dev_conserved[(n_fields-1)*n_cells + id] = U; + // Optional: Avoid Negative Internal Energies + U = fmax(U, (Real)TINY_NUMBER); + // Write Selected internal energy to the GasEnergy array ONLY + // to avoid mixing updated and non-updated values of E + // since the Dual Energy condition depends on the neighbor cells + dev_conserved[(n_fields - 1) * n_cells + id] = U; } } - -__global__ void Select_Internal_Energy_2D( Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields ){ - +__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields) +{ int id, xid, yid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; int imo, ipo, jmo, jpo; - n_cells = nx*ny; + n_cells = nx * ny; + Real eta_1 = DE_ETA_1; Real eta_2 = DE_ETA_2; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - - imo = max(xid-1, n_ghost); - imo = imo + yid*nx; - ipo = min(xid+1, nx-n_ghost-1); - ipo = ipo + yid*nx; - jmo = max(yid-1, n_ghost); - jmo = xid + jmo*nx; - jpo = min(yid+1, ny-n_ghost-1); - jpo = xid + jpo*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; + + imo = max(xid - 1, n_ghost); + imo = imo + yid * nx; + ipo = min(xid + 1, nx - n_ghost - 1); + ipo = ipo + yid * nx; + jmo = max(yid - 1, n_ghost); + jmo = xid + jmo * nx; + jpo = min(yid + 1, ny - n_ghost - 1); + jpo = xid + jpo * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - U_advected = dev_conserved[(n_fields-1)*n_cells + id]; - U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz ); - - //find the max nearby total energy - Emax = fmax(dev_conserved[4*n_cells + imo], E); - Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jmo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jpo]); - - if (U_total/Emax > eta_2 ) U = U_total; - else U = U_advected; - - //Optional: Avoid Negative Internal Energies - U = fmax(U, (Real) TINY_NUMBER); + d = dev_conserved[id]; + d_inv = 1.0 / d; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + U_advected = dev_conserved[(n_fields - 1) * n_cells + id]; + U_total = E - 0.5 * d * (vx * vx + vy * vy + vz * vz); + + // find the max nearby total energy + Emax = fmax(dev_conserved[4 * n_cells + imo], E); + Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]); + + // We only use the "advected" internal energy if both: + // - the thermal energy divided by total energy is a small fraction (smaller than eta_1) + // - AND we aren't masking shock heating (details controlled by Emax & eta_2) + if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) { + U = U_total; + } else { + U = U_advected; + } - //Write Selected internal energy to the GasEnergy array ONLY - //to avoid mixing updated and non-updated values of E - //since the Dual Energy condition depends on the neighbour cells - dev_conserved[(n_fields-1)*n_cells + id] = U; + // Optional: Avoid Negative Internal Energies + U = fmax(U, (Real)TINY_NUMBER); + // Write Selected internal energy to the GasEnergy array ONLY + // to avoid mixing updated and non-updated values of E + // since the Dual Energy condition depends on the neighbour cells + dev_conserved[(n_fields - 1) * n_cells + id] = U; } } - -__global__ void Select_Internal_Energy_3D( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields ){ - +__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields) +{ int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, U_total, U_advected, U, Emax; int imo, ipo, jmo, jpo, kmo, kpo; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; + Real eta_1 = DE_ETA_1; Real eta_2 = DE_ETA_2; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; - - imo = max(xid-1, n_ghost); - imo = imo + yid*nx + zid*nx*ny; - ipo = min(xid+1, nx-n_ghost-1); - ipo = ipo + yid*nx + zid*nx*ny; - jmo = max(yid-1, n_ghost); - jmo = xid + jmo*nx + zid*nx*ny; - jpo = min(yid+1, ny-n_ghost-1); - jpo = xid + jpo*nx + zid*nx*ny; - kmo = max(zid-1, n_ghost); - kmo = xid + yid*nx + kmo*nx*ny; - kpo = min(zid+1, nz-n_ghost-1); - kpo = xid + yid*nx + kpo*nx*ny; - + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + + imo = max(xid - 1, n_ghost); + imo = imo + yid * nx + zid * nx * ny; + ipo = min(xid + 1, nx - n_ghost - 1); + ipo = ipo + yid * nx + zid * nx * ny; + jmo = max(yid - 1, n_ghost); + jmo = xid + jmo * nx + zid * nx * ny; + jpo = min(yid + 1, ny - n_ghost - 1); + jpo = xid + jpo * nx + zid * nx * ny; + kmo = max(zid - 1, n_ghost); + kmo = xid + yid * nx + kmo * nx * ny; + kpo = min(zid + 1, nz - n_ghost - 1); + kpo = xid + yid * nx + kpo * nx * ny; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; - d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - U_advected = dev_conserved[(n_fields-1)*n_cells + id]; - U_total = E - 0.5*d*( vx*vx + vy*vy + vz*vz ); - - //find the max nearby total energy - Emax = fmax(dev_conserved[4*n_cells + imo], E); - Emax = fmax(Emax, dev_conserved[4*n_cells + ipo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jmo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + jpo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + kmo]); - Emax = fmax(Emax, dev_conserved[4*n_cells + kpo]); - - if (U_total/Emax > eta_2 ) U = U_total; - else U = U_advected; - - //Optional: Avoid Negative Internal Energies - U = fmax(U, (Real) TINY_NUMBER); + d = dev_conserved[id]; + d_inv = 1.0 / d; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + U_advected = dev_conserved[(n_fields - 1) * n_cells + id]; + U_total = E - 0.5 * d * (vx * vx + vy * vy + vz * vz); + + // find the max nearby total energy + Emax = fmax(dev_conserved[4 * n_cells + imo], E); + Emax = fmax(Emax, dev_conserved[4 * n_cells + ipo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jmo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + jpo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + kmo]); + Emax = fmax(Emax, dev_conserved[4 * n_cells + kpo]); + + // We only use the "advected" internal energy if both: + // - the thermal energy divided by total energy is a small fraction (smaller than eta_1) + // - AND we aren't masking shock heating (details controlled by Emax & eta_2) + if ((U_total / E > eta_1) or (U_total / Emax > eta_2)) { + U = U_total; + } else { + U = U_advected; + } - //Write Selected internal energy to the GasEnergy array ONLY - //to avoid mixing updated and non-updated values of E - //since the Dual Energy condition depends on the neighbour cells - dev_conserved[(n_fields-1)*n_cells + id] = U; + // Optional: Avoid Negative Internal Energies + U = fmax(U, (Real)TINY_NUMBER); + // Write Selected internal energy to the GasEnergy array ONLY + // to avoid mixing updated and non-updated values of E + // since the Dual Energy condition depends on the neighbour cells + dev_conserved[(n_fields - 1) * n_cells + id] = U; } } @@ -1039,178 +1022,286 @@ __global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real n_cells = nx; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; + id = threadIdx.x + blockIdx.x * blockDim.x; xid = id; - // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - U = dev_conserved[(n_fields-1)*n_cells + id]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + U = dev_conserved[(n_fields - 1) * n_cells + id]; - //Use the previously selected Internal Energy to update the total energy - dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U; + // Use the previously selected Internal Energy to update the total energy + dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } - } - __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields) { int id, xid, yid, n_cells; Real d, d_inv, vx, vy, vz, U; - n_cells = nx*ny; + n_cells = nx * ny; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; - + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - U = dev_conserved[(n_fields-1)*n_cells + id]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + U = dev_conserved[(n_fields - 1) * n_cells + id]; - //Use the previously selected Internal Energy to update the total energy - dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U; + // Use the previously selected Internal Energy to update the total energy + dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } - } - __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields) { - //Called in a separate kernel to avoid interfering with energy selection in Select_Internal_Energy + // Called in a separate kernel to avoid interfering with energy selection in + // Select_Internal_Energy int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, U; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { // every thread collects the conserved variables it needs from global memory - d = dev_conserved[ id]; + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - U = dev_conserved[(n_fields-1)*n_cells + id]; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + U = dev_conserved[(n_fields - 1) * n_cells + id]; - //Use the previously selected Internal Energy to update the total energy - dev_conserved[4*n_cells + id] = 0.5*d*( vx*vx + vy*vy + vz*vz ) + U; + // Use the previously selected Internal Energy to update the total energy + dev_conserved[4 * n_cells + id] = 0.5 * d * (vx * vx + vy * vy + vz * vz) + U; } } +#endif // DE -#endif //DE +void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor) +{ + // set values for GPU kernels + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; + // number of blocks per 1D grid + dim3 dim1dGrid(ngrid, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB, 1, 1); + + hipLaunchKernelGGL(Temperature_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields, U_floor); +} -#ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor ) +__global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real U_floor) { int id, xid, yid, zid, n_cells; Real d, d_inv, vx, vy, vz, E, Ekin, U; - n_cells = nx*ny*nz; + n_cells = nx * ny * nz; // get a global thread ID - id = threadIdx.x + blockIdx.x * blockDim.x; - zid = id / (nx*ny); - yid = (id - zid*nx*ny) / nx; - xid = id - zid*nx*ny - yid*nx; - + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; // threads corresponding to real cells do the calculation - if (xid > n_ghost-1 && xid < nx-n_ghost && yid > n_ghost-1 && yid < ny-n_ghost && zid > n_ghost-1 && zid < nz-n_ghost) - { - d = dev_conserved[ id]; + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - E = dev_conserved[4*n_cells + id]; - Ekin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - - U = ( E - Ekin ) / d; - if ( U < U_floor ) dev_conserved[4*n_cells + id] = Ekin + d*U_floor; + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + E = dev_conserved[4 * n_cells + id]; + Ekin = 0.5 * d * (vx * vx + vy * vy + vz * vz); + + U = (E - Ekin) / d; + if (U < U_floor) { + dev_conserved[4 * n_cells + id] = Ekin + d * U_floor; + } - #ifdef DE - U = dev_conserved[(n_fields-1)*n_cells + id] / d ; - if ( U < U_floor ) dev_conserved[(n_fields-1)*n_cells + id] = d*U_floor ; - #endif +#ifdef DE + U = dev_conserved[(n_fields - 1) * n_cells + id] / d; + if (U < U_floor) { + dev_conserved[(n_fields - 1) * n_cells + id] = d * U_floor; + } +#endif } } -#endif //TEMPERATURE_FLOOR - -__device__ Real Average_Cell_Single_Field( int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved ){ +__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, + Real *conserved) +{ Real v_l, v_r, v_d, v_u, v_b, v_t, v_avrg; int id; - id = (i-1) + (j)*nx + (k)*nx*ny; - v_l = conserved[ field_indx*ncells + id ]; - id = (i+1) + (j)*nx + (k)*nx*ny; - v_r = conserved[ field_indx*ncells + id ]; - id = (i) + (j-1)*nx + (k)*nx*ny; - v_d = conserved[ field_indx*ncells + id ]; - id = (i) + (j+1)*nx + (k)*nx*ny; - v_u = conserved[ field_indx*ncells + id ]; - id = (i) + (j)*nx + (k-1)*nx*ny; - v_b = conserved[ field_indx*ncells + id ]; - id = (i) + (j)*nx + (k+1)*nx*ny; - v_t = conserved[ field_indx*ncells + id ]; - v_avrg = ( v_l + v_r + v_d + v_u + v_b + v_t ) / 6; - id = (i) + (j)*nx + (k)*nx*ny; - conserved[ field_indx*ncells + id ] = v_avrg; + id = (i - 1) + (j)*nx + (k)*nx * ny; + v_l = conserved[field_indx * ncells + id]; + id = (i + 1) + (j)*nx + (k)*nx * ny; + v_r = conserved[field_indx * ncells + id]; + id = (i) + (j - 1) * nx + (k)*nx * ny; + v_d = conserved[field_indx * ncells + id]; + id = (i) + (j + 1) * nx + (k)*nx * ny; + v_u = conserved[field_indx * ncells + id]; + id = (i) + (j)*nx + (k - 1) * nx * ny; + v_b = conserved[field_indx * ncells + id]; + id = (i) + (j)*nx + (k + 1) * nx * ny; + v_t = conserved[field_indx * ncells + id]; + v_avrg = (v_l + v_r + v_d + v_u + v_b + v_t) / 6; + id = (i) + (j)*nx + (k)*nx * ny; + conserved[field_indx * ncells + id] = v_avrg; return v_avrg; +} + +__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, + Real gamma, Real *conserved) +{ + int id = i + (j)*nx + (k)*nx * ny; + + Real d, mx, my, mz, E, P; + d = conserved[grid_enum::density * ncells + id]; + mx = conserved[grid_enum::momentum_x * ncells + id]; + my = conserved[grid_enum::momentum_y * ncells + id]; + mz = conserved[grid_enum::momentum_z * ncells + id]; + E = conserved[grid_enum::Energy * ncells + id]; + P = (E - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); + + printf("%3d %3d %3d BC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, mx / d, my / d, mz / d); + + int idn; + int N = 0; + Real d_av, vx_av, vy_av, vz_av, P_av; + d_av = vx_av = vy_av = vz_av = P_av = 0.0; +#ifdef SCALAR + Real scalar[NSCALARS], scalar_av[NSCALARS]; + for (int n = 0; n < NSCALARS; n++) { // NOLINT + scalar_av[n] = 0.0; + } +#endif + + for (int kk = k - 1; kk <= k + 1; kk++) { + for (int jj = j - 1; jj <= j + 1; jj++) { + for (int ii = i - 1; ii <= i + 1; ii++) { + idn = ii + jj * nx + kk * nx * ny; + d = conserved[grid_enum::density * ncells + idn]; + mx = conserved[grid_enum::momentum_x * ncells + idn]; + my = conserved[grid_enum::momentum_y * ncells + idn]; + mz = conserved[grid_enum::momentum_z * ncells + idn]; + P = (conserved[grid_enum::Energy * ncells + idn] - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); +#ifdef SCALAR + for (int n = 0; n < NSCALARS; n++) { // NOLINT + scalar[n] = conserved[grid_enum::scalar * ncells + idn]; + } +#endif + if (d > 0.0 && P > 0.0) { + d_av += d; + vx_av += mx; + vy_av += my; + vz_av += mz; + P_av += P / (gamma - 1.0); +#ifdef SCALAR + for (int n = 0; n < NSCALARS; n++) { // NOLINT + scalar_av[n] += scalar[n]; + } +#endif + N++; + } + } + } + } + P_av = P_av / N; + vx_av = vx_av / d_av; + vy_av = vy_av / d_av; + vz_av = vz_av / d_av; +#ifdef SCALAR + for (int n = 0; n < NSCALARS; n++) { // NOLINT + scalar_av[n] = scalar_av[n] / d_av; + } +#endif + d_av = d_av / N; + + // replace cell values with new averaged values + conserved[id + ncells * grid_enum::density] = d_av; + conserved[id + ncells * grid_enum::momentum_x] = d_av * vx_av; + conserved[id + ncells * grid_enum::momentum_y] = d_av * vy_av; + conserved[id + ncells * grid_enum::momentum_z] = d_av * vz_av; + conserved[id + ncells * grid_enum::Energy] = + P_av / (gamma - 1.0) + 0.5 * d_av * (vx_av * vx_av + vy_av * vy_av + vz_av * vz_av); +#ifdef DE + conserved[id + ncells * grid_enum::GasEnergy] = P_av / (gamma - 1.0); +#endif +#ifdef SCALAR + for (int n = 0; n < NSCALARS; n++) { // NOLINT + conserved[id + ncells * grid_enum::scalar] = d_av * scalar_av[n]; + } +#endif + + d = d_av; + E = P_av / (gamma - 1.0) + 0.5 * d_av * (vx_av * vx_av + vy_av * vy_av + vz_av * vz_av); + P = P_av; + + printf("%3d %3d %3d FC: d: %e E:%e P:%e vx:%e vy:%e vz:%e\n", i, j, k, d, E, P, vx_av, vy_av, vz_av); } -__device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved ){ - - // Average Density - Average_Cell_Single_Field( 0, i, j, k, nx, ny, nz, ncells, conserved ); - // Average Momentum_x - Average_Cell_Single_Field( 1, i, j, k, nx, ny, nz, ncells, conserved ); - // Average Momentum_y - Average_Cell_Single_Field( 2, i, j, k, nx, ny, nz, ncells, conserved ); - // Average Momentum_z - Average_Cell_Single_Field( 3, i, j, k, nx, ny, nz, ncells, conserved ); - // Average Energy - Average_Cell_Single_Field( 4, i, j, k, nx, ny, nz, ncells, conserved ); - #ifdef MHD - // Average MHD - Average_Cell_Single_Field( 5+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 5+NSCALARS, i-1, j, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 6+NSCALARS, i, j-1, k, nx, ny, nz, ncells, conserved ); - Average_Cell_Single_Field( 7+NSCALARS, i, j, k-1, nx, ny, nz, ncells, conserved ); - #endif //MHD - #ifdef DE - // Average GasEnergy - Average_Cell_Single_Field( n_fields-1, i, j, k, nx, ny, nz, ncells, conserved ); - #endif //DE +void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor) +{ + // set values for GPU kernels + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; + // number of blocks per 1D grid + dim3 dim1dGrid(ngrid, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB, 1, 1); + + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, field_num, + scalar_floor); } +__global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real scalar_floor) +{ + int id, xid, yid, zid, n_cells; + Real scalar; // variable to store the value of the scalar before a floor is applied + n_cells = nx * ny * nz; -#endif //CUDA + // get a global thread ID + id = threadIdx.x + blockIdx.x * blockDim.x; + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; + + // threads corresponding to real cells do the calculation + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { + scalar = dev_conserved[id + n_cells * field_num]; + + if (scalar < scalar_floor) { + // printf("###Thread scalar change %f -> %f \n", scalar, scalar_floor); + dev_conserved[id + n_cells * field_num] = scalar_floor; + } + } +} diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index c801882d3..f167d0745 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -1,21 +1,25 @@ /*! \file hydro_cuda.h * \brief Declarations of functions used in all cuda integration algorithms. */ -#ifdef CUDA #ifndef HYDRO_CUDA_H #define HYDRO_CUDA_H #include "../global/global.h" #include "../utils/mhd_utilities.h" -__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, Real dx, Real xbound, Real dt, Real gamma, int n_fields); +__global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, + Real dx, Real xbound, Real dt, Real gamma, int n_fields, int custom_grav); +__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, + int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, + Real ybound, Real dt, Real gamma, int n_fields, int custom_grav); -__global__ void Update_Conserved_Variables_2D(Real *dev_conserved, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, Real gamma, int n_fields); - - -__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, Real gamma, int n_fields, Real density_floor, Real *dev_potential ); - +__global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, + Real *Q_Lz, Real *Q_Rz, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, + int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, + Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, Real dt, + Real gamma, int n_fields, int custom_grav, Real density_floor, + Real *dev_potential); /*! * \brief Determine the maximum inverse crossing time in a specific cell @@ -32,16 +36,9 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R * \param[in] gamma The adiabatic index * \return Real The maximum inverse crossing time in the cell */ -__device__ __host__ Real hydroInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &dx, - Real const &dy, - Real const &dz, - Real const &gamma); +__device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &dx, Real const &dy, + Real const &dz, Real const &gamma); /*! * \brief Determine the maximum inverse crossing time in a specific cell @@ -61,59 +58,64 @@ __device__ __host__ Real hydroInverseCrossingTime(Real const &E, * \param[in] gamma The adiabatic index * \return Real The maximum inverse crossing time in the cell */ -__device__ __host__ Real mhdInverseCrossingTime(Real const &E, - Real const &d, - Real const &d_inv, - Real const &vx, - Real const &vy, - Real const &vz, - Real const &avgBx, - Real const &avgBy, - Real const &avgBz, - Real const &dx, - Real const &dy, - Real const &dz, +__device__ __host__ Real mhdInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, + Real const &vy, Real const &vz, Real const &avgBx, Real const &avgBy, + Real const &avgBz, Real const &dx, Real const &dy, Real const &dz, Real const &gamma); -__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz); +__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, + int nz, Real dx, Real dy, Real dz); -Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma ); +Real Calc_dt_GPU(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, + Real gamma); __global__ void Sync_Energies_1D(Real *dev_conserved, int nx, int n_ghost, Real gamma, int n_fields); - __global__ void Sync_Energies_2D(Real *dev_conserved, int nx, int ny, int n_ghost, Real gamma, int n_fields); - __global__ void Sync_Energies_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, Real gamma, int n_fields); #ifdef AVERAGE_SLOW_CELLS -void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ); +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, + Real dz, Real gamma, Real max_dti_slow); -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, Real dz, Real gamma, Real max_dti_slow ); +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, + Real dy, Real dz, Real gamma, Real max_dti_slow); #endif -#ifdef TEMPERATURE_FLOOR -__global__ void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor ); -#endif +void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); + +__global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real U_floor); + +void Apply_Scalar_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, Real scalar_floor); -__global__ void Partial_Update_Advected_Internal_Energy_1D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, int n_ghost, Real dx, Real dt, Real gamma, int n_fields ); +__global__ void Scalar_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int field_num, + Real scalar_floor); -__global__ void Partial_Update_Advected_Internal_Energy_2D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields ); +__global__ void Partial_Update_Advected_Internal_Energy_1D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, int nx, + int n_ghost, Real dx, Real dt, Real gamma, int n_fields); -__global__ void Partial_Update_Advected_Internal_Energy_3D( Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields ); +__global__ void Partial_Update_Advected_Internal_Energy_2D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields); -__global__ void Select_Internal_Energy_1D( Real *dev_conserved, int nx, int n_ghost, int n_fields ); +__global__ void Partial_Update_Advected_Internal_Energy_3D(Real *dev_conserved, Real *Q_Lx, Real *Q_Rx, Real *Q_Ly, + Real *Q_Ry, Real *Q_Lz, Real *Q_Rz, int nx, int ny, int nz, + int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, + int n_fields); -__global__ void Select_Internal_Energy_2D( Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields ); +__global__ void Select_Internal_Energy_1D(Real *dev_conserved, int nx, int n_ghost, int n_fields); -__global__ void Select_Internal_Energy_3D( Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields ); +__global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, int n_ghost, int n_fields); -__device__ void Average_Cell_All_Fields( int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, Real *conserved ); +__global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields); -__device__ Real Average_Cell_Single_Field( int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved ); +__device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, + Real gamma, Real *conserved); +__device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, + Real *conserved); -#endif //HYDRO_CUDA_H -#endif //CUDA +#endif // HYDRO_CUDA_H diff --git a/src/hydro/hydro_cuda_tests.cu b/src/hydro/hydro_cuda_tests.cu index a6d00e96b..c289f1551 100644 --- a/src/hydro/hydro_cuda_tests.cu +++ b/src/hydro/hydro_cuda_tests.cu @@ -1,90 +1,80 @@ /*! -* \file hydro_cuda_tests.cu -* \author Evan Schneider (evs34@pitt.edu) -* \brief Test the code units within hydro_cuda.cu -* -*/ + * \file hydro_cuda_tests.cu + * \author Evan Schneider (evs34@pitt.edu) + * \brief Test the code units within hydro_cuda.cu + * + */ // STL Includes +#include + #include -#include #include -#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" +#include "../hydro/hydro_cuda.h" // Include code to test +#include "../utils/DeviceVector.h" #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../hydro/hydro_cuda.h" // Include code to test - -#if defined(CUDA) // ============================================================================= // Tests for the Calc_dt_GPU function // ============================================================================= TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) { - - Real* testDt; - cudaHostAlloc(&testDt, sizeof(Real), cudaHostAllocDefault); - // Call the function we are testing int num_blocks = 1; dim3 dim1dGrid(num_blocks, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - int const nx = 1; - int const ny = 1; - int const nz = 1; - int const n_fields = 5; // Total number of conserved fields - int const n_ghost = 0; - Real dx = 1.0; - Real dy = 1.0; - Real dz = 1.0; - Real *host_conserved; - Real *dev_conserved; - Real *dev_dti_array; - Real gamma = 5.0/3.0; - - // Allocate host and device arrays and copy data - cudaHostAlloc(&host_conserved, n_fields*sizeof(Real), cudaHostAllocDefault); - CudaSafeCall(cudaMalloc(&dev_conserved, n_fields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&dev_dti_array, sizeof(Real))); + int const nx = 1; + int const ny = 1; + int const nz = 1; + int const n_fields = 5; // Total number of conserved fields + int const n_ghost = 0; + Real dx = 1.0; + Real dy = 1.0; + Real dz = 1.0; + std::vector host_conserved(n_fields); + cuda_utilities::DeviceVector dev_conserved(n_fields); + cuda_utilities::DeviceVector dev_dti(1); + Real gamma = 5.0 / 3.0; // Set values of conserved variables for input (host) - host_conserved[0] = 1.0; // density - host_conserved[1] = 0.0; // x momentum - host_conserved[2] = 0.0; // y momentum - host_conserved[3] = 0.0; // z momentum - host_conserved[4] = 1.0; // Energy + host_conserved.at(0) = 1.0; // density + host_conserved.at(1) = 0.0; // x momentum + host_conserved.at(2) = 0.0; // y momentum + host_conserved.at(3) = 0.0; // z momentum + host_conserved.at(4) = 1.0; // Energy // Copy host data to device arrray - CudaSafeCall(cudaMemcpy(dev_conserved, host_conserved, n_fields*sizeof(Real), cudaMemcpyHostToDevice)); - //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real dz) + dev_conserved.cpyHostToDevice(host_conserved); + //__global__ void Calc_dt_3D(Real *dev_conserved, Real *dev_dti, Real gamma, + // int n_ghost, int n_fields, int nx, int ny, int nz, Real dx, Real dy, Real + // dz) // Run the kernel - hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_dti_array, gamma, n_ghost, n_fields, nx, ny, nz, dx, dy, dz); - CudaCheckError(); - - // Copy the dt value back from the GPU - CudaSafeCall(cudaMemcpy(testDt, dev_dti_array, sizeof(Real), cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(Calc_dt_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), dev_dti.data(), gamma, n_ghost, + n_fields, nx, ny, nz, dx, dy, dz); + GPU_Error_Check(); // Compare results // Check for equality and if not equal return difference - double fiducialDt = 1.0540925533894598; - double testData = testDt[0]; + double const fiducialDt = 1.0540925533894598; + double const testData = dev_dti.at(0); double absoluteDiff; int64_t ulpsDiff; bool areEqual; - areEqual = testingUtilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff); - EXPECT_TRUE(areEqual) - << "The fiducial value is: " << fiducialDt << std::endl - << "The test value is: " << testData << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; + areEqual = testing_utilities::nearlyEqualDbl(fiducialDt, testData, absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) << "The fiducial value is: " << fiducialDt << std::endl + << "The test value is: " << testData << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } // ============================================================================= // End of tests for the Calc_dt_GPU function @@ -93,37 +83,28 @@ TEST(tHYDROCalcDt3D, CorrectInputExpectCorrectOutput) // ============================================================================= // Tests for the hydroInverseCrossingTime function // ============================================================================= -TEST(tHYDROHydroInverseCrossingTime, - CorrectInputExpectCorrectOutput) +TEST(tHYDROHydroInverseCrossingTime, CorrectInputExpectCorrectOutput) { -// Set test values -double const energy = 7.6976906577e2; -double const density = 1.6756968986; -double const velocityX = 7.0829278656; -double const velocityY = 5.9283073464; -double const velocityZ = 8.8417748226; -double const cellSizeX = 8.1019429453e2; -double const cellSizeY = 7.1254780684e2; -double const cellSizeZ = 7.5676716066e2; -double const gamma = 5./3.; - -// Fiducial Values -double const fiducialInverseCrossingTime = 0.038751126881804446; - -// Function to test -double testInverseCrossingTime = hydroInverseCrossingTime(energy, - density, - 1./density, - velocityX, - velocityY, - velocityZ, - cellSizeX, - cellSizeY, - cellSizeZ, - gamma); - -// Check results -testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + // Set test values + double const energy = 7.6976906577e2; + double const density = 1.6756968986; + double const velocityX = 7.0829278656; + double const velocityY = 5.9283073464; + double const velocityZ = 8.8417748226; + double const cellSizeX = 8.1019429453e2; + double const cellSizeY = 7.1254780684e2; + double const cellSizeZ = 7.5676716066e2; + double const gamma = 5. / 3.; + + // Fiducial Values + double const fiducialInverseCrossingTime = 0.038751126881804446; + + // Function to test + double testInverseCrossingTime = hydroInverseCrossingTime(energy, density, 1. / density, velocityX, velocityY, + velocityZ, cellSizeX, cellSizeY, cellSizeZ, gamma); + + // Check results + testing_utilities::Check_Results(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the hydroInverseCrossingTime function @@ -132,8 +113,7 @@ testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingT // ============================================================================= // Tests for the mhdInverseCrossingTime function // ============================================================================= -TEST(tMHDMhdInverseCrossingTime, - CorrectInputExpectCorrectOutput) +TEST(tMHDMhdInverseCrossingTime, CorrectInputExpectCorrectOutput) { // Set test values double const energy = 7.6976906577e2; @@ -147,32 +127,66 @@ TEST(tMHDMhdInverseCrossingTime, double const cellSizeX = 8.1019429453e2; double const cellSizeY = 7.1254780684e2; double const cellSizeZ = 7.5676716066e2; - double const gamma = 5./3.; + double const gamma = 5. / 3.; // Fiducial Values double const fiducialInverseCrossingTime = 0.038688028391959103; // Function to test - double testInverseCrossingTime = mhdInverseCrossingTime(energy, - density, - 1./density, - velocityX, - velocityY, - velocityZ, - magneticX, - magneticY, - magneticZ, - cellSizeX, - cellSizeY, - cellSizeZ, - gamma); - + double testInverseCrossingTime = + mhdInverseCrossingTime(energy, density, 1. / density, velocityX, velocityY, velocityZ, magneticX, magneticY, + magneticZ, cellSizeX, cellSizeY, cellSizeZ, gamma); // Check results - testingUtilities::checkResults(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); + testing_utilities::Check_Results(fiducialInverseCrossingTime, testInverseCrossingTime, "inverse crossing time"); } // ============================================================================= // End of tests for the mhdInverseCrossingTime function // ============================================================================= -#endif // CUDA +TEST(tHYDROScalarFloor, CorrectInputExpectCorrectOutput) +{ + int num_blocks = 1; + dim3 dim1dGrid(num_blocks, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + int const nx = 1; + int const ny = 1; + int const nz = 1; + int const n_fields = 6; // 5 conserved + 1 scalar + int const n_ghost = 0; + int const field_num = 5; // scalar field index + + // initialize host and device conserved arrays + std::vector host_conserved(n_fields); + cuda_utilities::DeviceVector dev_conserved(n_fields); + + // Set values of conserved variables for input (host) + host_conserved.at(0) = 0.0; // density + host_conserved.at(1) = 0.0; // x momentum + host_conserved.at(2) = 0.0; // y momentum + host_conserved.at(3) = 0.0; // z momentum + host_conserved.at(4) = 0.0; // energy + + Real scalar_floor = 1.0; // minimum allowed value for scalar field + + // Case where scalar is below the floor + host_conserved.at(field_num) = 0.0; // scalar + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + field_num, scalar_floor); + testing_utilities::Check_Results(scalar_floor, dev_conserved.at(field_num), "below floor"); + + // Case where scalar is above the floor + host_conserved.at(field_num) = 2.0; // scalar + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + field_num, scalar_floor); + testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "above floor"); + + // Case where scalar is at the floor + host_conserved.at(field_num) = 1.0; // scalar + dev_conserved.cpyHostToDevice(host_conserved); + hipLaunchKernelGGL(Scalar_Floor_Kernel, dim1dGrid, dim1dBlock, 0, 0, dev_conserved.data(), nx, ny, nz, n_ghost, + field_num, scalar_floor); + testing_utilities::Check_Results(host_conserved.at(field_num), dev_conserved.at(field_num), "at floor"); +} \ No newline at end of file diff --git a/src/integrators/VL_1D_cuda.cu b/src/integrators/VL_1D_cuda.cu index 0eaecc899..f2ad520b8 100644 --- a/src/integrators/VL_1D_cuda.cu +++ b/src/integrators/VL_1D_cuda.cu @@ -1,153 +1,165 @@ /*! \file VL_1D_cuda.cu * \brief Definitions of the cuda VL algorithm functions. */ -#ifdef CUDA #ifdef VL -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/VL_1D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../utils/error_handling.h" -#include "../io/io.h" - + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_1D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/error_handling.h" + #include "../utils/gpu.hpp" __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, - int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields); - + int n_cells, int n_ghost, Real dx, Real dt, Real gamma, + int n_fields); - -void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields) +void VL_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields, int custom_grav) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid - int n_cells = nx; - int ny = 1; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx; + [[maybe_unused]] int ny = 1; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); dim3 dimBlock(TPB, 1, 1); - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - // Step 1: Use PCM reconstruction to put conserved variables into interface arrays - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); - CudaCheckError(); + // Step 1: Use PCM reconstruction to put conserved variables into interface + // arrays + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, + n_fields); + GPU_Error_Check(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif - CudaCheckError(); - + GPU_Error_Check(); // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, n_cells, n_ghost, dx, 0.5*dt, gama, n_fields); - CudaCheckError(); - + hipLaunchKernelGGL(Update_Conserved_Variables_1D_half, dimGrid, dimBlock, 0, 0, dev_conserved, dev_conserved_half, + F_x, n_cells, n_ghost, dx, 0.5 * dt, gama, n_fields); + GPU_Error_Check(); - // Step 4: Construct left and right interface values using updated conserved variables + // Step 4: Construct left and right interface values using updated conserved + // variables #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, n_ghost, gama, + n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); #endif #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMC_VL, dimGrid, dimBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); #endif - CudaCheckError(); - + GPU_Error_Check(); // Step 5: Calculate the fluxes again #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); #ifdef DE - // Compute the divergence of velocity before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields ); + // Compute the divergence of velocity before updating the conserved array, + // this solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, + n_ghost, dx, dt, gama, n_fields); #endif - // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields); - CudaCheckError(); - + hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, + n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); + GPU_Error_Check(); #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif return; - - } -void Free_Memory_VL_1D() { - +void Free_Memory_VL_1D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(dev_conserved_half); cudaFree(Q_Lx); cudaFree(Q_Rx); cudaFree(F_x); - } -__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F, + int n_cells, int n_ghost, Real dx, Real dt, Real gamma, int n_fields) { int id, imo; - Real dtodx = dt/dx; + Real dtodx = dt / dx; // get a global thread ID id = threadIdx.x + blockIdx.x * blockDim.x; @@ -158,47 +170,48 @@ __global__ void Update_Conserved_Variables_1D_half(Real *dev_conserved, Real *de int ipo; #endif - // threads corresponding all cells except outer ring of ghost cells do the calculation - if (id > 0 && id < n_cells-1) - { - imo = id-1; - #ifdef DE - d = dev_conserved[ id]; + // threads corresponding all cells except outer ring of ghost cells do the + // calculation + if (id > 0 && id < n_cells - 1) { + imo = id - 1; + #ifdef DE + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); - //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id); - ipo = id+1; - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + // if (d < 0.0 || d != d) printf("Negative density before half step + // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step + // update.\n", id); + ipo = id + 1; + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + #endif // update the conserved variable array - dev_conserved_half[ id] = dev_conserved[ id] + dtodx * (dev_F[ imo] - dev_F[ id]); - dev_conserved_half[ n_cells + id] = dev_conserved[ n_cells + id] + dtodx * (dev_F[ n_cells + imo] - dev_F[ n_cells + id]); - dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] + dtodx * (dev_F[2*n_cells + imo] - dev_F[2*n_cells + id]); - dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] + dtodx * (dev_F[3*n_cells + imo] - dev_F[3*n_cells + id]); - dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] + dtodx * (dev_F[4*n_cells + imo] - dev_F[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/VL_2D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" - - -__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, - Real *dev_F_x, Real *dev_F_y, int nx, int ny, - int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields); - - -void VL_Algorithm_2D_CUDA ( Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, - Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields) + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_2D_cuda.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + +__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields); + +void VL_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav) { + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - - int n_cells = nx*ny; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid dim3 dim2dGrid(ngrid, 1, 1); - //number of threads per 1D block + // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate GPU arrays - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; - CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - // Step 1: Use PCM reconstruction to put conserved variables into interface arrays - hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields); - CudaCheckError(); - + // Step 1: Use PCM reconstruction to put conserved variables into interface + // arrays + hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, + n_ghost, gama, n_fields); + GPU_Error_Check(); // Step 2: Calculate first-order upwind fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif - CudaCheckError(); - + GPU_Error_Check(); // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5*dt, gama, n_fields); - CudaCheckError(); - + hipLaunchKernelGGL(Update_Conserved_Variables_2D_half, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, + F_x, F_y, nx, ny, n_ghost, dx, dy, 0.5 * dt, gama, n_fields); + GPU_Error_Check(); - // Step 4: Construct left and right interface values using updated conserved variables + // Step 4: Construct left and right interface values using updated conserved + // variables #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); #endif #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, + 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, + 1, n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif //PPMP + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, + dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, + dt, gama, 1, n_fields); + #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif //PPMC - CudaCheckError(); - + hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, dim2dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); + #endif // PPMC + GPU_Error_Check(); // Step 5: Calculate the fluxes again #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); #endif #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); #endif - CudaCheckError(); + GPU_Error_Check(); #ifdef DE - // Compute the divergence of velocity before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields ); + // Compute the divergence of velocity before updating the conserved array, + // this solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields); #endif - // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); - CudaCheckError(); - + hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, + y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); + GPU_Error_Check(); #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif return; - } -void Free_Memory_VL_2D() { - +void Free_Memory_VL_2D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(dev_conserved_half); @@ -156,25 +173,25 @@ void Free_Memory_VL_2D() { cudaFree(Q_Ry); cudaFree(F_x); cudaFree(F_y); - } - -__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, Real dt, Real gamma, int n_fields) +__global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, int nx, int ny, int n_ghost, Real dx, Real dy, + Real dt, Real gamma, int n_fields) { int id, xid, yid, n_cells; int imo, jmo; - Real dtodx = dt/dx; - Real dtody = dt/dy; + Real dtodx = dt / dx; + Real dtody = dt / dy; - n_cells = nx*ny; + n_cells = nx * ny; // get a global thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - id = threadIdx.x + blockId * blockDim.x; - yid = id / nx; - xid = id - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + id = threadIdx.x + blockId * blockDim.x; + yid = id / nx; + xid = id - yid * nx; #ifdef DE Real d, d_inv, vx, vy, vz; @@ -182,63 +199,58 @@ __global__ void Update_Conserved_Variables_2D_half(Real *dev_conserved, Real *de int ipo, jpo; #endif - // all threads but one outer ring of ghost cells - if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1) - { - imo = xid-1 + yid*nx; - jmo = xid + (yid-1)*nx; - #ifdef DE - d = dev_conserved[ id]; + if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1) { + imo = xid - 1 + yid * nx; + jmo = xid + (yid - 1) * nx; + #ifdef DE + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); - //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id); - ipo = xid+1 + yid*nx; - jpo = xid + (yid+1)*nx; - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + P = (dev_conserved[4 * n_cells + id] - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); + // if (d < 0.0 || d != d) printf("Negative density before half step + // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step + // update.\n", id); + ipo = xid + 1 + yid * nx; + jpo = xid + (yid + 1) * nx; + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; + #endif // update the conserved variable array - dev_conserved_half[ id] = dev_conserved[ id] - + dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]); - dev_conserved_half[ n_cells + id] = dev_conserved[ n_cells + id] - + dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]); - dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] - + dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]); - dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] - + dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]); - dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] - + dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i -#include -#include -#include "../utils/gpu.hpp" -#include "../utils/hydro_utilities.h" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../integrators/VL_3D_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../io/io.h" -#include "../riemann_solvers/hll_cuda.h" - -__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor); - - + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/VL_3D_cuda.h" + #include "../io/io.h" + #include "../mhd/ct_electric_fields.h" + #include "../mhd/magnetic_update.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hll_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/hlld_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + #include "../utils/hydro_utilities.h" + +__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, + Real density_floor); void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, - int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ) + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential) { + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid @@ -47,155 +52,315 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - //host_grav_potential is NULL if not using GRAVITY + // host_grav_potential is NULL if not using GRAVITY temp_potential = host_grav_potential; - if ( !memory_allocated ){ - + if (!memory_allocated) { // allocate memory on the GPU - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); dev_conserved = d_conserved; - CudaSafeCall( cudaMalloc((void**)&dev_conserved_half, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_z, n_fields*n_cells*sizeof(Real)) ); - - #if defined( GRAVITY ) - // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) ); + + // Set the size of the interface and flux arrays + #ifdef MHD + // In MHD/Constrained Transport the interface arrays have one fewer fields + // since the magnetic field that is stored on the face does not require + // reconstructions. Similarly the fluxes have one fewer fields since the + // magnetic field on that face doesn't have an associated flux. Each + // interface array store the magnetic fields on that interface that are + // not perpendicular to the interface and arranged cyclically. I.e. the + // `Q_Lx` interface store the reconstructed Y and Z magnetic fields in + // that order, the `Q_Ly` interface stores the Z and X mangetic fields in + // that order, and the `Q_Lz` interface stores the X and Y magnetic fields + // in that order. These fields can be indexed with the Q_?_dir grid_enums. + // The interface state arrays store in the interface on the "right" side of + // the cell, so the flux arrays store the fluxes through the right interface + // + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the face + // centered electric fields/EMF. -cross(V,B)x is the negative of the + // x-component of V cross B. Note that "X" is the direction the solver is + // running in this case, not necessarily the true "X". + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Z F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_X F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Y F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_X + size_t const arraySize = (n_fields - 1) * n_cells * sizeof(Real); + size_t const ctArraySize = 3 * n_cells * sizeof(Real); + #else // not MHD + size_t const arraySize = n_fields * n_cells * sizeof(Real); + #endif // MHD + GPU_Error_Check(cudaMalloc((void **)&dev_conserved_half, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Lz, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&Q_Rz, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&F_x, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&F_y, arraySize)); + GPU_Error_Check(cudaMalloc((void **)&F_z, arraySize)); + + cuda_utilities::initGpuMemory(dev_conserved_half, n_fields * n_cells * sizeof(Real)); + cuda_utilities::initGpuMemory(Q_Lx, arraySize); + cuda_utilities::initGpuMemory(Q_Rx, arraySize); + cuda_utilities::initGpuMemory(Q_Ly, arraySize); + cuda_utilities::initGpuMemory(Q_Ry, arraySize); + cuda_utilities::initGpuMemory(Q_Lz, arraySize); + cuda_utilities::initGpuMemory(Q_Rz, arraySize); + cuda_utilities::initGpuMemory(F_x, arraySize); + cuda_utilities::initGpuMemory(F_y, arraySize); + cuda_utilities::initGpuMemory(F_z, arraySize); + + #ifdef MHD + GPU_Error_Check(cudaMalloc((void **)&ctElectricFields, ctArraySize)); + #endif // MHD + + #if defined(GRAVITY) dev_grav_potential = d_grav_potential; - #else + #else // not GRAVITY dev_grav_potential = NULL; - #endif + #endif // GRAVITY - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; - } - #if defined( GRAVITY ) && !defined( GRAVITY_GPU ) - CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); - #endif - - - // Step 1: Use PCM reconstruction to put primitive variables into interface arrays - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); - - - // Step 2: Calculate first-order upwind fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //EXACT - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //ROE - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLL - CudaCheckError(); - - - // Step 3: Update the conserved variables half a timestep - hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, nx, ny, nz, n_ghost, dx, dy, dz, 0.5*dt, gama, n_fields, density_floor ); - CudaCheckError(); - - - // Step 4: Construct left and right interface values using updated conserved variables - #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); - #endif - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PLMP - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PPMP - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PPMC - CudaCheckError(); - - - // Step 5: Calculate the fluxes again - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //EXACT - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //ROE - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC - CudaCheckError(); - - #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); - CudaCheckError(); - #endif + #if defined(GRAVITY) && !defined(GRAVITY_GPU) + GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); + #endif // GRAVITY and GRAVITY_GPU + + // Step 1: Use PCM reconstruction to put primitive variables into interface + // arrays + cuda_utilities::AutomaticLaunchParams static const pcm_launch_params(PCM_Reconstruction_3D, n_cells); + hipLaunchKernelGGL(PCM_Reconstruction_3D, pcm_launch_params.numBlocks, pcm_launch_params.threadsPerBlock, 0, 0, + dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); + GPU_Error_Check(); + + // Step 2: Calculate first-order upwind fluxes + #ifdef EXACT + cuda_utilities::AutomaticLaunchParams static const exact_launch_params(Calculate_Exact_Fluxes_CUDA, + n_cellsCalculate_Exact_Fluxes_CUDA); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // EXACT + #ifdef ROE + cuda_utilities::AutomaticLaunchParams static const roe_launch_params(Calculate_Roe_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // ROE + #ifdef HLLC + cuda_utilities::AutomaticLaunchParams static const hllc_launch_params(Calculate_HLLC_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLL + cuda_utilities::AutomaticLaunchParams static const hll_launch_params(Calculate_HLL_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLL + #ifdef HLLD + cuda_utilities::AutomaticLaunchParams static const hlld_launch_params(mhd::Calculate_HLLD_Fluxes_CUDA, n_cells); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lx, Q_Rx, &(dev_conserved[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Ly, Q_Ry, &(dev_conserved[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lz, Q_Rz, &(dev_conserved[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, + n_fields); + #endif // HLLD + GPU_Error_Check(); + + #ifdef MHD + // Step 2.5: Compute the Constrained transport electric fields + cuda_utilities::AutomaticLaunchParams static const ct_launch_params(mhd::Calculate_CT_Electric_Fields, n_cells); + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, ct_launch_params.numBlocks, ct_launch_params.threadsPerBlock, 0, + 0, F_x, F_y, F_z, dev_conserved, ctElectricFields, nx, ny, nz, n_cells); + GPU_Error_Check(); + #endif // MHD + + // Step 3: Update the conserved variables half a timestep + cuda_utilities::AutomaticLaunchParams static const update_half_launch_params(Update_Conserved_Variables_3D_half, + n_cells); + hipLaunchKernelGGL(Update_Conserved_Variables_3D_half, update_half_launch_params.numBlocks, + update_half_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved_half, F_x, F_y, F_z, + nx, ny, nz, n_ghost, dx, dy, dz, 0.5 * dt, gama, n_fields, density_floor); + GPU_Error_Check(); + + #ifdef MHD + // Update the magnetic fields + cuda_utilities::AutomaticLaunchParams static const update_magnetic_launch_params(mhd::Update_Magnetic_Field_3D, + n_cells); + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.numBlocks, + update_magnetic_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved_half, + ctElectricFields, nx, ny, nz, n_cells, 0.5 * dt, dx, dy, dz); + GPU_Error_Check(); + #endif // MHD + + // Step 4: Construct left and right interface values using updated conserved + // variables + #ifdef PCM + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved_half, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); + #endif // PCM + #ifdef PLMP + cuda_utilities::AutomaticLaunchParams static const plmp_launch_params(PLMP_cuda, n_cells); + hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, plmp_launch_params.numBlocks, plmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); + #endif // PLMP + #ifdef PLMC + cuda_utilities::AutomaticLaunchParams static const plmc_vl_launch_params(PLMC_cuda, n_cells); + hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PLMC_cuda, plmc_vl_launch_params.numBlocks, plmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, n_fields); + #endif // PLMC + #ifdef PPMP + cuda_utilities::AutomaticLaunchParams static const ppmp_launch_params(PPMP_cuda, n_cells); + hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); + hipLaunchKernelGGL(PPMP_cuda, ppmp_launch_params.numBlocks, ppmp_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); + #endif // PPMP + #ifdef PPMC + cuda_utilities::AutomaticLaunchParams static const ppmc_vl_launch_params(PPMC_VL, n_cells); + hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lx, Q_Rx, nx, ny, nz, gama, 0); + hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Ly, Q_Ry, nx, ny, nz, gama, 1); + hipLaunchKernelGGL(PPMC_VL, ppmc_vl_launch_params.numBlocks, ppmc_vl_launch_params.threadsPerBlock, 0, 0, + dev_conserved_half, Q_Lz, Q_Rz, nx, ny, nz, gama, 2); + #endif // PPMC + GPU_Error_Check(); + + // Step 5: Calculate the fluxes again + #ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, exact_launch_params.numBlocks, exact_launch_params.threadsPerBlock, 0, + 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // EXACT + #ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, roe_launch_params.numBlocks, roe_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // ROE + #ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, hllc_launch_params.numBlocks, hllc_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLL + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, hll_launch_params.numBlocks, hll_launch_params.threadsPerBlock, 0, 0, + Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); + #endif // HLLC + #ifdef HLLD + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lx, Q_Rx, &(dev_conserved_half[(grid_enum::magnetic_x)*n_cells]), F_x, n_cells, gama, 0, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Ly, Q_Ry, &(dev_conserved_half[(grid_enum::magnetic_y)*n_cells]), F_y, n_cells, gama, 1, + n_fields); + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, hlld_launch_params.numBlocks, hlld_launch_params.threadsPerBlock, + 0, 0, Q_Lz, Q_Rz, &(dev_conserved_half[(grid_enum::magnetic_z)*n_cells]), F_z, n_cells, gama, 2, + n_fields); + #endif // HLLD + GPU_Error_Check(); + #ifdef DE + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables_3D + cuda_utilities::AutomaticLaunchParams static const de_advect_launch_params(Partial_Update_Advected_Internal_Energy_3D, + n_cells); + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, de_advect_launch_params.numBlocks, + de_advect_launch_params.threadsPerBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, + nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); + GPU_Error_Check(); + #endif // DE + + #ifdef MHD + // Step 5.5: Compute the Constrained transport electric fields + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, ct_launch_params.numBlocks, ct_launch_params.threadsPerBlock, 0, + 0, F_x, F_y, F_z, dev_conserved_half, ctElectricFields, nx, ny, nz, n_cells); + GPU_Error_Check(); + #endif // MHD + + // Step 6: Update the conserved variable array + cuda_utilities::AutomaticLaunchParams static const update_full_launch_params(Update_Conserved_Variables_3D, n_cells); + hipLaunchKernelGGL(Update_Conserved_Variables_3D, update_full_launch_params.numBlocks, + update_full_launch_params.threadsPerBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, + F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, + gama, n_fields, custom_grav, density_floor, dev_grav_potential); + GPU_Error_Check(); + + #ifdef MHD + // Update the magnetic fields + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.numBlocks, + update_magnetic_launch_params.threadsPerBlock, 0, 0, dev_conserved, dev_conserved, + ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); + GPU_Error_Check(); + #endif // MHD - // Step 6: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); - CudaCheckError(); + #ifdef DE + cuda_utilities::AutomaticLaunchParams static const de_select_launch_params(Select_Internal_Energy_3D, n_cells); + hipLaunchKernelGGL(Select_Internal_Energy_3D, de_select_launch_params.numBlocks, + de_select_launch_params.threadsPerBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); + cuda_utilities::AutomaticLaunchParams static const de_sync_launch_params(Sync_Energies_3D, n_cells); + hipLaunchKernelGGL(Sync_Energies_3D, de_sync_launch_params.numBlocks, de_sync_launch_params.threadsPerBlock, 0, 0, + dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); + GPU_Error_Check(); + #endif // DE - #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); - hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); - #endif - - #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); - CudaCheckError(); - #endif //TEMPERATURE_FLOOR return; - } - -void Free_Memory_VL_3D(){ - +void Free_Memory_VL_3D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(dev_conserved_half); @@ -208,128 +373,125 @@ void Free_Memory_VL_3D(){ cudaFree(F_x); cudaFree(F_y); cudaFree(F_z); - + cudaFree(ctElectricFields); } -__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, Real density_floor ) +__global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *dev_conserved_half, Real *dev_F_x, + Real *dev_F_y, Real *dev_F_z, int nx, int ny, int nz, int n_ghost, + Real dx, Real dy, Real dz, Real dt, Real gamma, int n_fields, + Real density_floor) { - Real dtodx = dt/dx; - Real dtody = dt/dy; - Real dtodz = dt/dz; - int n_cells = nx*ny*nz; + Real dtodx = dt / dx; + Real dtody = dt / dy; + Real dtodz = dt / dz; + int n_cells = nx * ny * nz; // get a global thread ID int tid = threadIdx.x + blockIdx.x * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - int id = xid + yid*nx + zid*nx*ny; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; + int id = xid + yid * nx + zid * nx * ny; - int imo = xid-1 + yid*nx + zid*nx*ny; - int jmo = xid + (yid-1)*nx + zid*nx*ny; - int kmo = xid + yid*nx + (zid-1)*nx*ny; + int imo = xid - 1 + yid * nx + zid * nx * ny; + int jmo = xid + (yid - 1) * nx + zid * nx * ny; + int kmo = xid + yid * nx + (zid - 1) * nx * ny; #ifdef DE Real d, d_inv, vx, vy, vz; Real vx_imo, vx_ipo, vy_jmo, vy_jpo, vz_kmo, vz_kpo, P, E, E_kin, GE; int ipo, jpo, kpo; - #endif - - #ifdef DENSITY_FLOOR - Real dens_0; - #endif + #endif // DE - // threads corresponding to all cells except outer ring of ghost cells do the calculation - if (xid > 0 && xid < nx-1 && yid > 0 && yid < ny-1 && zid > 0 && zid < nz-1) - { - #ifdef DE - d = dev_conserved[ id]; + // threads corresponding to all cells except outer ring of ghost cells do the + // calculation + if (xid > 0 && xid < nx - 1 && yid > 0 && yid < ny - 1 && zid > 0 && zid < nz - 1) { + #ifdef DE + d = dev_conserved[id]; d_inv = 1.0 / d; - vx = dev_conserved[1*n_cells + id] * d_inv; - vy = dev_conserved[2*n_cells + id] * d_inv; - vz = dev_conserved[3*n_cells + id] * d_inv; - //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - GE = dev_conserved[(n_fields-1)*n_cells + id]; - E_kin = 0.5 * d * ( vx*vx + vy*vy + vz*vz ); - P = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, GE, gamma ); - P = fmax(P, (Real) TINY_NUMBER); - // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - //if (d < 0.0 || d != d) printf("Negative density before half step update.\n"); - //if (P < 0.0) printf("%d Negative pressure before half step update.\n", id); - ipo = xid+1 + yid*nx + zid*nx*ny; - jpo = xid + (yid+1)*nx + zid*nx*ny; - kpo = xid + yid*nx + (zid+1)*nx*ny; - vx_imo = dev_conserved[1*n_cells + imo] / dev_conserved[imo]; - vx_ipo = dev_conserved[1*n_cells + ipo] / dev_conserved[ipo]; - vy_jmo = dev_conserved[2*n_cells + jmo] / dev_conserved[jmo]; - vy_jpo = dev_conserved[2*n_cells + jpo] / dev_conserved[jpo]; - vz_kmo = dev_conserved[3*n_cells + kmo] / dev_conserved[kmo]; - vz_kpo = dev_conserved[3*n_cells + kpo] / dev_conserved[kpo]; - #endif + vx = dev_conserved[1 * n_cells + id] * d_inv; + vy = dev_conserved[2 * n_cells + id] * d_inv; + vz = dev_conserved[3 * n_cells + id] * d_inv; + // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + GE = dev_conserved[(n_fields - 1) * n_cells + id]; + E_kin = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(d, vx, vy, vz); + #ifdef MHD + // Add the magnetic energy + auto const [centeredBx, centeredBy, centeredBz] = + mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + E_kin += mhd::utils::computeMagneticEnergy(centeredBx, centeredBy, centeredBz); + #endif // MHD + P = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, GE, gamma); + P = fmax(P, (Real)TINY_NUMBER); + // P = (dev_conserved[4*n_cells + id] - 0.5*d*(vx*vx + vy*vy + vz*vz)) * + // (gamma - 1.0); + // if (d < 0.0 || d != d) printf("Negative density before half step + // update.\n"); if (P < 0.0) printf("%d Negative pressure before half step + // update.\n", id); + ipo = xid + 1 + yid * nx + zid * nx * ny; + jpo = xid + (yid + 1) * nx + zid * nx * ny; + kpo = xid + yid * nx + (zid + 1) * nx * ny; + vx_imo = dev_conserved[1 * n_cells + imo] / dev_conserved[imo]; + vx_ipo = dev_conserved[1 * n_cells + ipo] / dev_conserved[ipo]; + vy_jmo = dev_conserved[2 * n_cells + jmo] / dev_conserved[jmo]; + vy_jpo = dev_conserved[2 * n_cells + jpo] / dev_conserved[jpo]; + vz_kmo = dev_conserved[3 * n_cells + kmo] / dev_conserved[kmo]; + vz_kpo = dev_conserved[3 * n_cells + kpo] / dev_conserved[kpo]; + #endif // DE // update the conserved variable array - dev_conserved_half[ id] = dev_conserved[ id] - + dtodx * (dev_F_x[ imo] - dev_F_x[ id]) - + dtody * (dev_F_y[ jmo] - dev_F_y[ id]) - + dtodz * (dev_F_z[ kmo] - dev_F_z[ id]); - dev_conserved_half[ n_cells + id] = dev_conserved[ n_cells + id] - + dtodx * (dev_F_x[ n_cells + imo] - dev_F_x[ n_cells + id]) - + dtody * (dev_F_y[ n_cells + jmo] - dev_F_y[ n_cells + id]) - + dtodz * (dev_F_z[ n_cells + kmo] - dev_F_z[ n_cells + id]); - dev_conserved_half[2*n_cells + id] = dev_conserved[2*n_cells + id] - + dtodx * (dev_F_x[2*n_cells + imo] - dev_F_x[2*n_cells + id]) - + dtody * (dev_F_y[2*n_cells + jmo] - dev_F_y[2*n_cells + id]) - + dtodz * (dev_F_z[2*n_cells + kmo] - dev_F_z[2*n_cells + id]); - dev_conserved_half[3*n_cells + id] = dev_conserved[3*n_cells + id] - + dtodx * (dev_F_x[3*n_cells + imo] - dev_F_x[3*n_cells + id]) - + dtody * (dev_F_y[3*n_cells + jmo] - dev_F_y[3*n_cells + id]) - + dtodz * (dev_F_z[3*n_cells + kmo] - dev_F_z[3*n_cells + id]); - dev_conserved_half[4*n_cells + id] = dev_conserved[4*n_cells + id] - + dtodx * (dev_F_x[4*n_cells + imo] - dev_F_x[4*n_cells + id]) - + dtody * (dev_F_y[4*n_cells + jmo] - dev_F_y[4*n_cells + id]) - + dtodz * (dev_F_z[4*n_cells + kmo] - dev_F_z[4*n_cells + id]); - #ifdef SCALAR - for (int i=0; i %f \n", dens_0, density_floor ); - dev_conserved_half[ id] = density_floor; + #endif // SCALAR + #ifdef DE + dev_conserved_half[(n_fields - 1) * n_cells + id] = + dev_conserved[(n_fields - 1) * n_cells + id] + + dtodx * (dev_F_x[(n_fields - 1) * n_cells + imo] - dev_F_x[(n_fields - 1) * n_cells + id]) + + dtody * (dev_F_y[(n_fields - 1) * n_cells + jmo] - dev_F_y[(n_fields - 1) * n_cells + id]) + + dtodz * (dev_F_z[(n_fields - 1) * n_cells + kmo] - dev_F_z[(n_fields - 1) * n_cells + id]) + + 0.5 * P * (dtodx * (vx_imo - vx_ipo) + dtody * (vy_jmo - vy_jpo) + dtodz * (vz_kmo - vz_kpo)); + #endif // DE + #ifdef DENSITY_FLOOR + if (dev_conserved_half[id] < density_floor) { + Real dens_0 = dev_conserved_half[id]; + printf("###Thread density change %f -> %f \n", dens_0, density_floor); + dev_conserved_half[id] = density_floor; // Scale the conserved values to the new density - dev_conserved_half[1*n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[2*n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[3*n_cells + id] *= (density_floor / dens_0); - dev_conserved_half[4*n_cells + id] *= (density_floor / dens_0); - #ifdef DE - dev_conserved_half[(n_fields-1)*n_cells + id] *= (density_floor / dens_0); - #endif + dev_conserved_half[1 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[2 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[3 * n_cells + id] *= (density_floor / dens_0); + dev_conserved_half[4 * n_cells + id] *= (density_floor / dens_0); + #ifdef DE + dev_conserved_half[(n_fields - 1) * n_cells + id] *= (density_floor / dens_0); + #endif // DE } - #endif - //if (dev_conserved_half[id] < 0.0 || dev_conserved_half[id] != dev_conserved_half[id] || dev_conserved_half[4*n_cells+id] < 0.0 || dev_conserved_half[4*n_cells+id] != dev_conserved_half[4*n_cells+id]) { - //printf("%3d %3d %3d Thread crashed in half step update. d: %e E: %e\n", xid, yid, zid, dev_conserved_half[id], dev_conserved_half[4*n_cells+id]); - //} - + #endif // DENSITY_FLOOR } - } - - - -#endif //VL -#endif //CUDA +#endif // VL diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 0d28710ab..4b80a4604 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -1,20 +1,15 @@ /*! \file VL_3D_cuda.h * \brief Declarations for the cuda version of the 3D VL algorithm. */ -#ifdef CUDA - #ifndef VL_3D_CUDA_H #define VL_3D_CUDA_H #include "../global/global.h" -void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, - int nx, int ny, int nz, int x_off, int y_off, - int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ); +void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential); void Free_Memory_VL_3D(); -#endif //VL_3D_CUDA_H -#endif //CUDA +#endif // VL_3D_CUDA_H diff --git a/src/integrators/simple_1D_cuda.cu b/src/integrators/simple_1D_cuda.cu index c1f209f01..80f26021a 100644 --- a/src/integrators/simple_1D_cuda.cu +++ b/src/integrators/simple_1D_cuda.cu @@ -1,124 +1,126 @@ /*! \file simple_1D_cuda.cu * \brief Definitions of the 1D simple algorithm functions. */ -#ifdef CUDA - +#include #include #include -#include -#include "../utils/gpu.hpp" + #include "../global/global.h" #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" #include "../integrators/simple_1D_cuda.h" +#include "../io/io.h" #include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" #include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" +#include "../reconstruction/plmp_cuda.h" #include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/ppmp_cuda.h" #include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" #include "../riemann_solvers/hllc_cuda.h" +#include "../riemann_solvers/roe_cuda.h" #include "../utils/error_handling.h" -#include "../io/io.h" - - +#include "../utils/gpu.hpp" -void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields) +void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields, int custom_grav) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid - int n_cells = nx; - int ny = 1; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; + int n_cells = nx; + [[maybe_unused]] int ny = 1; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set the dimensions of the cuda grid dim3 dimGrid(ngrid, 1, 1); dim3 dimBlock(TPB, 1, 1); - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, (n_fields)*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, (n_fields)*n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - // Step 1: Do the reconstruction - #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, n_fields); - CudaCheckError(); - #endif - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - CudaCheckError(); - #endif - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - CudaCheckError(); - #endif - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - CudaCheckError(); - #endif - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - CudaCheckError(); - #endif - - - // Step 2: Calculate the fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - #endif - CudaCheckError(); - - #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, dx, dt, gama, n_fields ); - #endif - +// Step 1: Do the reconstruction +#ifdef PCM + hipLaunchKernelGGL(PCM_Reconstruction_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, n_ghost, gama, + n_fields); + GPU_Error_Check(); +#endif +#ifdef PLMP + hipLaunchKernelGGL(PLMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, + 0, n_fields); + GPU_Error_Check(); +#endif +#ifdef PLMC + hipLaunchKernelGGL(PLMC_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + GPU_Error_Check(); +#endif +#ifdef PPMP + hipLaunchKernelGGL(PPMP_cuda, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, + 0, n_fields); + GPU_Error_Check(); +#endif +#ifdef PPMC + hipLaunchKernelGGL(PPMC_CTU, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + GPU_Error_Check(); +#endif + +// Step 2: Calculate the fluxes +#ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); +#endif +#ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); +#endif +#ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, + n_fields); +#endif + GPU_Error_Check(); + +#ifdef DE + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, + n_ghost, dx, dt, gama, n_fields); +#endif // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, n_ghost, dx, xbound, dt, gama, n_fields); - CudaCheckError(); - + hipLaunchKernelGGL(Update_Conserved_Variables_1D, dimGrid, dimBlock, 0, 0, dev_conserved, F_x, n_cells, x_off, + n_ghost, dx, xbound, dt, gama, n_fields, custom_grav); + GPU_Error_Check(); - // Synchronize the total and internal energy, if using dual-energy formalism - #ifdef DE +// Synchronize the total and internal energy, if using dual-energy formalism +#ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_1D, dimGrid, dimBlock, 0, 0, dev_conserved, nx, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_1D, dimGrid, dimBlock, 0, 0, dev_conserved, n_cells, n_ghost, gama, n_fields); - CudaCheckError(); - #endif - + GPU_Error_Check(); +#endif return; - } -void Free_Memory_Simple_1D() { - +void Free_Memory_Simple_1D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(Q_Lx); cudaFree(Q_Rx); cudaFree(F_x); - } - - -#endif //CUDA diff --git a/src/integrators/simple_1D_cuda.h b/src/integrators/simple_1D_cuda.h index 6aba36059..82ccf0c29 100644 --- a/src/integrators/simple_1D_cuda.h +++ b/src/integrators/simple_1D_cuda.h @@ -1,16 +1,14 @@ /*! \file simple_1D_cuda.h * \brief Declarations for the 1D simple algorithm. */ -#ifdef CUDA - #ifndef SIMPLE_1D_CUDA_H #define SIMPLE_1D_CUDA_H #include "../global/global.h" -void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, int n_fields); +void Simple_Algorithm_1D_CUDA(Real *d_conserved, int nx, int x_off, int n_ghost, Real dx, Real xbound, Real dt, + int n_fields, int custom_grav); void Free_Memory_Simple_1D(); -#endif //Simple_1D_CUDA_H -#endif //CUDA +#endif // Simple_1D_CUDA_H diff --git a/src/integrators/simple_2D_cuda.cu b/src/integrators/simple_2D_cuda.cu index 87cd87e58..97d435c51 100644 --- a/src/integrators/simple_2D_cuda.cu +++ b/src/integrators/simple_2D_cuda.cu @@ -1,120 +1,133 @@ /*! \file simple_2D_cuda.cu * \brief Definitions of the cuda 2D simple algorithm functions. */ -#ifdef CUDA - -#include #include -#include "../utils/gpu.hpp" +#include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../hydro/hydro_cuda.h" #include "../integrators/simple_2D_cuda.h" #include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" #include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" +#include "../reconstruction/plmp_cuda.h" #include "../reconstruction/ppmc_cuda.h" +#include "../reconstruction/ppmp_cuda.h" #include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" #include "../riemann_solvers/hllc_cuda.h" +#include "../riemann_solvers/roe_cuda.h" +#include "../utils/gpu.hpp" - - -void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields) +void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav) { - - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - int n_cells = nx*ny; - int nz = 1; - int ngrid = (n_cells + TPB - 1) / TPB; - + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array + int n_cells = nx * ny; + [[maybe_unused]] int nz = 1; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid dim3 dim2dGrid(ngrid, 1, 1); - //number of threads per 1D block + // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - if ( !memory_allocated ) { - + if (!memory_allocated) { // allocate memory on the GPU dev_conserved = d_conserved; - //CudaSafeCall( cudaMalloc((void**)&dev_conserved, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // GPU_Error_Check( cudaMalloc((void**)&dev_conserved, + // n_fields*n_cells*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; } - // Step 1: Do the reconstruction - #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, gama, n_fields); - #endif - #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - #endif - CudaCheckError(); - - - // Step 2: Calculate the fluxes - #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - #endif - CudaCheckError(); - - #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields ); - #endif +// Step 1: Do the reconstruction +#ifdef PCM + hipLaunchKernelGGL(PCM_Reconstruction_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, nx, ny, + n_ghost, gama, n_fields); +#endif +#ifdef PLMP + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); +#endif +#ifdef PLMC + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, + n_fields); +#endif +#ifdef PPMP + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); +#endif +#ifdef PPMC + hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); +#endif + GPU_Error_Check(); + +// Step 2: Calculate the fluxes +#ifdef EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); +#endif +#ifdef ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); +#endif +#ifdef HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim2dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); +#endif + GPU_Error_Check(); + +#ifdef DE + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, nx, ny, n_ghost, dx, dy, dt, gama, n_fields); +#endif // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields); - CudaCheckError(); + hipLaunchKernelGGL(Update_Conserved_Variables_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, F_x, F_y, nx, ny, x_off, + y_off, n_ghost, dx, dy, xbound, ybound, dt, gama, n_fields, custom_grav); + GPU_Error_Check(); - // Synchronize the total and internal energy - #ifdef DE +// Synchronize the total and internal energy +#ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, n_fields); hipLaunchKernelGGL(Sync_Energies_2D, dim2dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, n_ghost, gama, n_fields); - CudaCheckError(); - #endif + GPU_Error_Check(); +#endif return; - } -void Free_Memory_Simple_2D() { - +void Free_Memory_Simple_2D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(Q_Lx); @@ -123,8 +136,4 @@ void Free_Memory_Simple_2D() { cudaFree(Q_Ry); cudaFree(F_x); cudaFree(F_y); - } - -#endif //CUDA - diff --git a/src/integrators/simple_2D_cuda.h b/src/integrators/simple_2D_cuda.h index 7a531f952..a381c553a 100644 --- a/src/integrators/simple_2D_cuda.h +++ b/src/integrators/simple_2D_cuda.h @@ -1,16 +1,14 @@ /*! \file simple_2D_cuda.h * \brief Declarations for the cuda version of the 2D simple algorithm. */ -#ifdef CUDA - #ifndef SIMPLE_2D_CUDA_H #define SIMPLE_2D_CUDA_H #include "../global/global.h" -void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, Real xbound, Real ybound, Real dt, int n_fields); +void Simple_Algorithm_2D_CUDA(Real *d_conserved, int nx, int ny, int x_off, int y_off, int n_ghost, Real dx, Real dy, + Real xbound, Real ybound, Real dt, int n_fields, int custom_grav); void Free_Memory_Simple_2D(); -#endif //SIMPLE_2D_CUDA_H -#endif //CUDA +#endif // SIMPLE_2D_CUDA_H diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 1b854dea9..528eab04f 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -1,41 +1,37 @@ /*! \file simple_3D_cuda.cu * \brief Definitions of the cuda 3D simple algorithm functions. */ -#ifdef CUDA #ifdef SIMPLE -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../hydro/hydro_cuda.h" -#include "../integrators/simple_3D_cuda.h" -#include "../reconstruction/pcm_cuda.h" -#include "../reconstruction/plmp_cuda.h" -#include "../reconstruction/plmc_cuda.h" -#include "../reconstruction/ppmp_cuda.h" -#include "../reconstruction/ppmc_cuda.h" -#include "../riemann_solvers/exact_cuda.h" -#include "../riemann_solvers/roe_cuda.h" -#include "../riemann_solvers/hllc_cuda.h" -#include "../io/io.h" -#include "../riemann_solvers/hll_cuda.h" - - - -void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, - int nx, int ny, int nz, int x_off, int y_off, - int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ) + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../hydro/hydro_cuda.h" + #include "../integrators/simple_3D_cuda.h" + #include "../io/io.h" + #include "../reconstruction/pcm_cuda.h" + #include "../reconstruction/plmc_cuda.h" + #include "../reconstruction/plmp_cuda.h" + #include "../reconstruction/ppmc_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../riemann_solvers/exact_cuda.h" + #include "../riemann_solvers/hll_cuda.h" + #include "../riemann_solvers/hllc_cuda.h" + #include "../riemann_solvers/roe_cuda.h" + #include "../utils/gpu.hpp" + +void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential) { - //Here, *dev_conserved contains the entire - //set of conserved variables on the grid - //concatenated into a 1-d array - int n_cells = nx*ny*nz; - int ngrid = (n_cells + TPB - 1) / TPB; + // Here, *dev_conserved contains the entire + // set of conserved variables on the grid + // concatenated into a 1-d array + int n_cells = nx * ny * nz; + int ngrid = (n_cells + TPB - 1) / TPB; // set values for GPU kernels // number of blocks per 1D grid @@ -43,125 +39,150 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, // number of threads per 1D block dim3 dim1dBlock(TPB, 1, 1); - //host_grav_potential is NULL if not using GRAVITY + // host_grav_potential is NULL if not using GRAVITY temp_potential = host_grav_potential; - if ( !memory_allocated ){ + if (!memory_allocated) { size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); + // allocate memory on the GPU - chprintf( " Allocating Hydro Memory: nfields: %d n_cells: %d nx: %d ny: %d nz: %d \n", n_fields, n_cells, nx, ny, nz ); - chprintf( " Memory needed: %f GB Free: %f GB Total: %f GB \n", n_fields*n_cells*sizeof(Real)/1e9, global_free/1e9, global_total/1e9 ); + chprintf( + " Allocating Hydro Memory: nfields: %d n_cells: %d nx: %d ny: %d " + "nz: %d \n", + n_fields, n_cells, nx, ny, nz); + chprintf(" Memory needed: %f GB Free: %f GB Total: %f GB \n", n_fields * n_cells * sizeof(Real) / 1e9, + global_free / 1e9, global_total / 1e9); dev_conserved = d_conserved; - CudaSafeCall( cudaMalloc((void**)&Q_Lx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rx, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ly, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Ry, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Lz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&Q_Rz, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_x, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_y, n_fields*n_cells*sizeof(Real)) ); - CudaSafeCall( cudaMalloc((void**)&F_z, n_fields*n_cells*sizeof(Real)) ); - - #if defined( GRAVITY ) - // CudaSafeCall( cudaMalloc((void**)&dev_grav_potential, n_cells*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)&Q_Lx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rx, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ly, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Ry, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Lz, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&Q_Rz, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_x, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_y, n_fields * n_cells * sizeof(Real))); + GPU_Error_Check(cudaMalloc((void **)&F_z, n_fields * n_cells * sizeof(Real))); + + #if defined(GRAVITY) + // GPU_Error_Check( cudaMalloc((void**)&dev_grav_potential, + // n_cells*sizeof(Real)) ); dev_grav_potential = d_grav_potential; - #else + #else dev_grav_potential = NULL; - #endif + #endif - // If memory is single allocated: memory_allocated becomes true and successive timesteps won't allocate memory. - // If the memory is not single allocated: memory_allocated remains Null and memory is allocated every timestep. + // If memory is single allocated: memory_allocated becomes true and + // successive timesteps won't allocate memory. If the memory is not single + // allocated: memory_allocated remains Null and memory is allocated every + // timestep. memory_allocated = true; - chprintf( " Memory allocated \n" ); - + chprintf(" Memory allocated \n"); } - #if defined( GRAVITY ) && !defined( GRAVITY_GPU ) - CudaSafeCall( cudaMemcpy(dev_grav_potential, temp_potential, n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); + #if defined(GRAVITY) && !defined(GRAVITY_GPU) + GPU_Error_Check(cudaMemcpy(dev_grav_potential, temp_potential, n_cells * sizeof(Real), cudaMemcpyHostToDevice)); #endif - - // Step 1: Construct left and right interface values using updated conserved variables + // Step 1: Construct left and right interface values using updated conserved + // variables #ifdef PCM - hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); + hipLaunchKernelGGL(PCM_Reconstruction_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, + Q_Rz, nx, ny, nz, n_ghost, gama, n_fields); #endif #ifdef PLMP - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PLMP + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); + hipLaunchKernelGGL(PLMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, + gama, 2, n_fields); + #endif // PLMP #ifdef PLMC - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1, + n_fields); + hipLaunchKernelGGL(PLMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2, + n_fields); #endif #ifdef PPMP - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - #endif //PPMP + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, + gama, 0, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, + gama, 1, n_fields); + hipLaunchKernelGGL(PPMP_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, + gama, 2, n_fields); + #endif // PPMP #ifdef PPMC - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, n_ghost, dx, dt, gama, 0, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, n_ghost, dy, dt, gama, 1, n_fields); - hipLaunchKernelGGL(PPMC_cuda, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dz, dt, gama, 2, n_fields); - CudaCheckError(); - #endif //PPMC - - + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, nx, ny, nz, dx, dt, gama, 0); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Ly, Q_Ry, nx, ny, nz, dy, dt, gama, 1); + hipLaunchKernelGGL(PPMC_CTU, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lz, Q_Rz, nx, ny, nz, dz, dt, gama, 2); + GPU_Error_Check(); + #endif // PPMC + // Step 2: Calculate the fluxes #ifdef EXACT - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //EXACT + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_Exact_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); + #endif // EXACT #ifdef ROE - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //ROE + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_Roe_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); + #endif // ROE #ifdef HLLC - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLLC + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, + gama, 0, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, + gama, 1, n_fields); + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, + gama, 2, n_fields); + #endif // HLLC #ifdef HLL - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, 0, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, 1, n_fields); - hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, 2, n_fields); - #endif //HLL - CudaCheckError(); - + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lx, Q_Rx, F_x, nx, ny, nz, n_ghost, gama, + 0, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Ly, Q_Ry, F_y, nx, ny, nz, n_ghost, gama, + 1, n_fields); + hipLaunchKernelGGL(Calculate_HLL_Fluxes_CUDA, dim1dGrid, dim1dBlock, 0, 0, Q_Lz, Q_Rz, F_z, nx, ny, nz, n_ghost, gama, + 2, n_fields); + #endif // HLL + GPU_Error_Check(); + #ifdef DE - // Compute the divergence of Vel before updating the conserved array, this solves synchronization issues when adding this term on Update_Conserved_Variables_3D - hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields ); - CudaCheckError(); + // Compute the divergence of Vel before updating the conserved array, this + // solves synchronization issues when adding this term on + // Update_Conserved_Variables_3D + hipLaunchKernelGGL(Partial_Update_Advected_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, + Q_Ly, Q_Ry, Q_Lz, Q_Rz, nx, ny, nz, n_ghost, dx, dy, dz, dt, gama, n_fields); + GPU_Error_Check(); #endif - + // Step 3: Update the conserved variable array - hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, zbound, dt, gama, n_fields, density_floor, dev_grav_potential); - CudaCheckError(); - + hipLaunchKernelGGL(Update_Conserved_Variables_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, Q_Lx, Q_Rx, Q_Ly, Q_Ry, + Q_Lz, Q_Rz, F_x, F_y, F_z, nx, ny, nz, x_off, y_off, z_off, n_ghost, dx, dy, dz, xbound, ybound, + zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); + GPU_Error_Check(); + #ifdef DE - hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); + hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, + n_fields); hipLaunchKernelGGL(Sync_Energies_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, gama, n_fields); - CudaCheckError(); + GPU_Error_Check(); #endif - - #ifdef TEMPERATURE_FLOOR - hipLaunchKernelGGL(Apply_Temperature_Floor, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, U_floor ); - CudaCheckError(); - #endif //TEMPERATURE_FLOOR - return; - } - -void Free_Memory_Simple_3D(){ - +void Free_Memory_Simple_3D() +{ // free the GPU memory cudaFree(dev_conserved); cudaFree(Q_Lx); @@ -173,11 +194,6 @@ void Free_Memory_Simple_3D(){ cudaFree(F_x); cudaFree(F_y); cudaFree(F_z); - } - - - -#endif //SIMPLE -#endif //CUDA +#endif // SIMPLE diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 9c904d2e7..847b93c61 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -1,21 +1,16 @@ /*! \file simple_3D_cuda.h * \brief Declarations for the cuda version of the 3D simple algorithm. */ -#ifdef CUDA - #ifndef SIMPLE_3D_CUDA_H #define SIMPLE_3D_CUDA_H -#include"../global/global.h" -#include"../chemistry_gpu/chemistry_gpu.h" +#include "../chemistry_gpu/chemistry_gpu.h" +#include "../global/global.h" -void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, - int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, - Real dx, Real dy, Real dz, Real xbound, - Real ybound, Real zbound, Real dt, int n_fields, Real density_floor, - Real U_floor, Real *host_grav_potential ); +void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, + int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential); void Free_Memory_Simple_3D(); -#endif //SIMPLE_3D_CUDA_H -#endif //CUDA +#endif // SIMPLE_3D_CUDA_H diff --git a/src/io/io.cpp b/src/io/io.cpp index be0a1b9fa..536ede3c3 100644 --- a/src/io/io.cpp +++ b/src/io/io.cpp @@ -1,192 +1,217 @@ +#include +#include #include #include -#include #include -#include -#include -#include + #include #include +#include +#include +#include +#include #ifdef HDF5 -#include -#endif //HDF5 -#include "../io/io.h" + #include +#endif // HDF5 #include "../grid/grid3D.h" +#include "../io/io.h" +#include "../utils/cuda_utilities.h" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" +#include "../utils/timing_functions.h" // provides ScopedTimer #ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif //MPI_CHOLLA -#include "../utils/error_handling.h" + #include "../mpi/mpi_routines.h" +#endif // MPI_CHOLLA #include "../utils/DeviceVector.h" +#include "../utils/error_handling.h" #ifdef COSMOLOGY -#include "../cosmology/cosmology.h" -#endif //COSMOLOGY - -using namespace std; + #include "../cosmology/cosmology.h" +#endif // COSMOLOGY -//#define OUTPUT_ENERGY -//#define OUTPUT_MOMENTUM +// #define OUTPUT_ENERGY +// #define OUTPUT_MOMENTUM -/* function used to rotate points about an axis in 3D for the rotated projection output routine */ -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); +/* function used to rotate points about an axis in 3D for the rotated projection + * output routine */ +void Rotate_Point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp); -void Create_Log_File( struct parameters P ){ +/* local function that designates whether we are using a root-process. It gives + * gives a sensible result regardless of whether we are using MPI */ +static inline bool Is_Root_Proc() +{ +#ifdef MPI_CHOLLA + return procID == root; +#else + return true; +#endif +} - #ifdef MPI_CHOLLA - if ( procID != 0 ) return; - #endif +void Create_Log_File(struct Parameters P) +{ + if (not Is_Root_Proc()) { + return; + } - string file_name ( LOG_FILE_NAME ); - chprintf( "\nCreating Log File: %s \n\n", file_name.c_str() ); + std::string file_name(LOG_FILE_NAME); + chprintf("\nCreating Log File: %s \n\n", file_name.c_str()); bool file_exists = false; - if (FILE *file = fopen(file_name.c_str(), "r")){ + if (FILE *file = fopen(file_name.c_str(), "r")) { file_exists = true; - chprintf( " File exists, appending values: %s \n\n", file_name.c_str() ); - fclose( file ); + chprintf(" File exists, appending values: %s \n\n", file_name.c_str()); + fclose(file); } // current date/time based on current system time_t now = time(0); // convert now to string form - char* dt = ctime(&now); + char *dt = ctime(&now); - ofstream out_file; - out_file.open(file_name.c_str(), ios::app); + std::ofstream out_file; + out_file.open(file_name.c_str(), std::ios::app); out_file << "\n"; out_file << "Run date: " << dt; out_file.close(); - } -void Write_Message_To_Log_File( const char* message ){ - - #ifdef MPI_CHOLLA - if ( procID != 0 ) return; - #endif - +void Write_Message_To_Log_File(const char *message) +{ + if (not Is_Root_Proc()) { + return; + } - string file_name ( LOG_FILE_NAME ); - ofstream out_file; - out_file.open(file_name.c_str(), ios::app); - out_file << message << endl; - out_file.close(); + std::string file_name(LOG_FILE_NAME); + std::ofstream out_file; + out_file.open(file_name.c_str(), std::ios::app); + out_file << message << std::endl; + out_file.close(); } /* Write Cholla Output Data */ -void WriteData(Grid3D &G, struct parameters P, int nfile) +void Write_Data(Grid3D &G, struct Parameters P, int nfile) { + cudaMemcpy(G.C.density, G.C.device, G.H.n_fields * G.H.n_cells * sizeof(Real), cudaMemcpyDeviceToHost); - cudaMemcpy(G.C.density, G.C.device, G.H.n_fields*G.H.n_cells*sizeof(Real), cudaMemcpyDeviceToHost); + chprintf("\nSaving Snapshot: %d \n", nfile); - chprintf( "\nSaving Snapshot: %d \n", nfile ); + // ensure the output-directory exists (try to create it if it doesn't exist) + // -> Aside: it would be nice to pass an FnameTemplate instance into each function that uses it, + // rather than reconstructing it everywhere + Ensure_Dir_Exists(FnameTemplate(P).effective_output_dir_path(nfile)); - #ifdef HDF5 +#ifdef HDF5 // Initialize HDF5 interface H5open(); - #endif +#endif - #ifdef N_OUTPUT_COMPLETE - //If nfile is multiple of N_OUTPUT_COMPLETE then output all data - if ( nfile%N_OUTPUT_COMPLETE == 0 ){ +#ifdef N_OUTPUT_COMPLETE + // If nfile is multiple of N_OUTPUT_COMPLETE then output all data + if (nfile % N_OUTPUT_COMPLETE == 0) { G.H.Output_Complete_Data = true; - chprintf( " Writing all data ( Restart File ).\n"); - } - else{ + chprintf(" Writing all data ( Restart File ).\n"); + } else { G.H.Output_Complete_Data = false; } - #else - //If NOT N_OUTPUT_COMPLETE: always output complete data +#else + // If NOT N_OUTPUT_COMPLETE: always output complete data G.H.Output_Complete_Data = true; - #endif +#endif - #ifdef COSMOLOGY - G.Change_Cosmological_Frame_Sytem( false ); - #endif +#ifdef COSMOLOGY + G.Change_Cosmological_Frame_Sytem(false); +#endif - #ifndef ONLY_PARTICLES +#ifndef ONLY_PARTICLES /*call the data output routine for Hydro data*/ - if (nfile % P.n_hydro == 0) OutputData(G,P,nfile); - #endif + if (nfile % P.n_hydro == 0) { + Output_Data(G, P, nfile); + } +#endif - // This function does other checks to make sure it is valid (3D only) - #ifdef HDF5 - if (P.n_out_float32 && nfile % P.n_out_float32 == 0) OutputFloat32(G,P,nfile); - #endif +// This function does other checks to make sure it is valid (3D only) +#ifdef HDF5 + if (P.n_out_float32 && nfile % P.n_out_float32 == 0) { + Output_Float32(G, P, nfile); + } +#endif - #ifdef PROJECTION - if (nfile % P.n_projection == 0) OutputProjectedData(G,P,nfile); - #endif /*PROJECTION*/ +#ifdef PROJECTION + if (nfile % P.n_projection == 0) { + Output_Projected_Data(G, P, nfile); + } +#endif /*PROJECTION*/ - #ifdef ROTATED_PROJECTION - if (nfile % P.n_rotated_projection == 0) OutputRotatedProjectedData(G,P,nfile); - #endif /*ROTATED_PROJECTION*/ +#ifdef ROTATED_PROJECTION + if (nfile % P.n_rotated_projection == 0) { + Output_Rotated_Projected_Data(G, P, nfile); + } +#endif /*ROTATED_PROJECTION*/ - #ifdef SLICES - if (nfile % P.n_slice == 0) OutputSlices(G,P,nfile); - #endif /*SLICES*/ +#ifdef SLICES + if (nfile % P.n_slice == 0) { + Output_Slices(G, P, nfile); + } +#endif /*SLICES*/ - #ifdef PARTICLES - if (nfile % P.n_particle == 0) G.WriteData_Particles(P, nfile); - #endif +#ifdef PARTICLES + if (nfile % P.n_particle == 0) { + G.WriteData_Particles(P, nfile); + } +#endif - #ifdef COSMOLOGY - if ( G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial){ +#ifdef COSMOLOGY + if (G.H.OUTPUT_SCALE_FACOR || G.H.Output_Initial) { G.Cosmo.Set_Next_Scale_Output(); - if ( !G.Cosmo.exit_now ){ - chprintf( " Saved Snapshot: %d z:%f next_output: %f\n", nfile, G.Cosmo.current_z, 1/G.Cosmo.next_output - 1 ); + if (!G.Cosmo.exit_now) { + chprintf(" Saved Snapshot: %d z:%f next_output: %f\n", nfile, G.Cosmo.current_z, + 1 / G.Cosmo.next_output - 1); G.H.Output_Initial = false; - } - else{ - chprintf( " Saved Snapshot: %d z:%f Exiting now\n", nfile, G.Cosmo.current_z ); + } else { + chprintf(" Saved Snapshot: %d z:%f Exiting now\n", nfile, G.Cosmo.current_z); } + } else { + chprintf(" Saved Snapshot: %d z:%f\n", nfile, G.Cosmo.current_z); } - else chprintf( " Saved Snapshot: %d z:%f\n", nfile, G.Cosmo.current_z ); - G.Change_Cosmological_Frame_Sytem( true ); - chprintf( "\n" ); + G.Change_Cosmological_Frame_Sytem(true); + chprintf("\n"); G.H.Output_Now = false; - #endif +#endif - #ifdef HDF5 +#ifdef HDF5 // Cleanup HDF5 H5close(); - #endif +#endif - #ifdef MPI_CHOLLA +#if defined(GRAVITY) && defined(HDF5) + G.Grav.Write_Restart_HDF5(&P, nfile); +#endif + +#ifdef MPI_CHOLLA MPI_Barrier(world); - #endif +#endif } - /* Output the grid data to file. */ -void OutputData(Grid3D &G, struct parameters P, int nfile) +void Output_Data(Grid3D &G, struct Parameters P, int nfile) { - char filename[MAXLEN]; - char timestep[20]; - // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d", nfile); - strcat(filename, timestep); - #if defined BINARY - strcat(filename, ".bin"); - #elif defined HDF5 - strcat(filename, ".h5"); - #else - strcat(filename, ".txt"); - if (G.H.nx*G.H.ny*G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); - #endif - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif + std::string filename = FnameTemplate(P).format_fname(nfile, ""); + +#if !defined(BINARY) && !defined(HDF5) + if (G.H.nx * G.H.ny * G.H.nz > 1000) printf("Ascii outputs only recommended for small problems!\n"); +#endif - // open the file for binary writes - #if defined BINARY +// open the file for binary writes +#if defined BINARY FILE *out; - out = fopen(filename, "w"); - if(out == NULL) {printf("Error opening output file.\n"); exit(-1); } + out = fopen(filename.data(), "w"); + if (out == NULL) { + printf("Error opening output file.\n"); + exit(-1); + } // write the header to the output file G.Write_Header_Binary(out); @@ -197,13 +222,13 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // close the output file fclose(out); - // create the file for hdf5 writes - #elif defined HDF5 - hid_t file_id; /* file identifier */ - herr_t status; +// create the file for hdf5 writes +#elif defined HDF5 + hid_t file_id; /* file identifier */ + herr_t status; // Create a new file using default properties. - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write the header (file attributes) G.Write_Header_HDF5(file_id); @@ -214,13 +239,19 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // close the file status = H5Fclose(file_id); - if (status < 0) {printf("File write failed.\n"); exit(-1); } + if (status < 0) { + printf("File write failed.\n"); + exit(-1); + } - #else +#else // open the file for txt writes FILE *out; - out = fopen(filename, "w"); - if(out == NULL) {printf("Error opening output file.\n"); exit(-1); } + out = fopen(filename.data(), "w"); + if (out == NULL) { + printf("Error opening output file.\n"); + exit(-1); + } // write the header to the output file G.Write_Header_Text(out); @@ -230,12 +261,12 @@ void OutputData(Grid3D &G, struct parameters P, int nfile) // close the output file fclose(out); - #endif +#endif } -void OutputFloat32(Grid3D &G, struct parameters P, int nfile) +void Output_Float32(Grid3D &G, struct Parameters P, int nfile) { - +#ifdef HDF5 Header H = G.H; // Do nothing in 1-D and 2-D case if (H.ny_real == 1) { @@ -249,24 +280,15 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) return; } - char filename[MAXLEN]; - char timestep[20]; - // create the filename - sprintf(timestep, "%d", nfile); - strcpy(filename, P.outdir); - strcat(filename, timestep); - strcat(filename, ".float32.h5"); - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif + std::string filename = FnameTemplate(P).format_fname(nfile, ".float32"); // create hdf5 file - hid_t file_id; /* file identifier */ - herr_t status; + hid_t file_id; /* file identifier */ + herr_t status; // Create a new file using default properties. - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write the header (file attributes) G.Write_Header_HDF5(file_id); @@ -274,73 +296,98 @@ void OutputFloat32(Grid3D &G, struct parameters P, int nfile) // write the conserved variables to the output file // 3-D Case - if (H.nx>1 && H.ny>1 && H.nz>1) { + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { int nx_dset = H.nx_real; int ny_dset = H.ny_real; int nz_dset = H.nz_real; size_t buffer_size; - // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. - // If the buffer is too large, it does not cause bugs (Oct 6 2022) -#ifdef MHD - buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); -#else - buffer_size = nx_dset*ny_dset*nz_dset; -#endif + // Need a larger device buffer for MHD. In the future, if other fields need + // a larger device buffer, choose the maximum of the sizes. If the buffer is + // too large, it does not cause bugs (Oct 6 2022) + #ifdef MHD + buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); + #else + buffer_size = nx_dset * ny_dset * nz_dset; + #endif // MHD - // Using static DeviceVector here automatically allocates the buffer the first time it is needed - // It persists until program exit, and then calls Free upon destruction + // Using static DeviceVector here automatically allocates the buffer the + // first time it is needed It persists until program exit, and then calls + // Free upon destruction cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - float* device_dataset_buffer = device_dataset_vector.data(); - float* dataset_buffer = (float *) malloc(buffer_size*sizeof(float)); - - if (P.out_float32_density > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_density, "/density"); - if (P.out_float32_momentum_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_x, "/momentum_x"); - if (P.out_float32_momentum_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_y, "/momentum_y"); - if (P.out_float32_momentum_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_momentum_z, "/momentum_z"); - if (P.out_float32_Energy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_Energy, "/Energy"); -#ifdef DE - if (P.out_float32_GasEnergy > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, G.C.d_GasEnergy, "/GasEnergy"); -#endif //DE -#ifdef MHD - if (P.out_float32_magnetic_x > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_x, "/magnetic_x"); - if (P.out_float32_magnetic_y > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_y, "/magnetic_y"); - if (P.out_float32_magnetic_z > 0) WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, G.C.d_magnetic_z, "/magnetic_z"); -#endif + auto *dataset_buffer = (float *)malloc(buffer_size * sizeof(float)); + if (P.out_float32_density > 0) { + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_density, "/density"); + } + if (P.out_float32_momentum_x > 0) { + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_x, "/momentum_x"); + } + if (P.out_float32_momentum_y > 0) { + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_y, "/momentum_y"); + } + if (P.out_float32_momentum_z > 0) { + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_momentum_z, "/momentum_z"); + } + if (P.out_float32_Energy > 0) { + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_Energy, "/Energy"); + } + #ifdef DE + if (P.out_float32_GasEnergy > 0) { + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_GasEnergy, "/GasEnergy"); + } + #endif // DE + #ifdef MHD - free(dataset_buffer); + // TODO (by Alwin, for anyone) : Repair output format if needed and remove these chprintfs when appropriate + if (P.out_float32_magnetic_x > 0) { + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_magnetic_x, "/magnetic_x"); + } + if (P.out_float32_magnetic_y > 0) { + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_magnetic_y, "/magnetic_y"); + } + if (P.out_float32_magnetic_z > 0) { + chprintf("WARNING: MHD float-32 output has a different output format than float-64\n"); + Write_HDF5_Field_3D(H.nx, H.ny, nx_dset + 1, ny_dset + 1, nz_dset + 1, H.n_ghost - 1, file_id, dataset_buffer, + device_dataset_vector.data(), G.C.d_magnetic_z, "/magnetic_z"); + } - if (status < 0) {printf("File write failed.\n"); exit(-1); } - } // 3-D case + #endif // MHD - // close the file - status = H5Fclose(file_id); + free(dataset_buffer); + if (status < 0) { + printf("File write failed.\n"); + exit(-1); + } + } // 3-D case + // close the file + status = H5Fclose(file_id); +#endif // HDF5 } - /* Output a projection of the grid data to file. */ -void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) +void Output_Projected_Data(Grid3D &G, struct Parameters P, int nfile) { - char filename[100]; - char timestep[20]; - #ifdef HDF5 - hid_t file_id; - herr_t status; +#ifdef HDF5 + hid_t file_id; + herr_t status; // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d_proj", nfile); - strcat(filename,timestep); - strcat(filename,".h5"); - - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif /*MPI_CHOLLA*/ + std::string filename = FnameTemplate(P).format_fname(nfile, "_proj"); // Create a new file - file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + file_id = H5Fcreate(filename.data(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); // Write header (file attributes) G.Write_Header_HDF5(file_id); @@ -352,50 +399,44 @@ void OutputProjectedData(Grid3D &G, struct parameters P, int nfile) status = H5Fclose(file_id); #ifdef MPI_CHOLLA - if (status < 0) {printf("OutputProjectedData: File write failed. ProcID: %d\n", procID); chexit(-1); } + if (status < 0) { + printf("Output_Projected_Data: File write failed. ProcID: %d\n", procID); + chexit(-1); + } #else - if (status < 0) {printf("OutputProjectedData: File write failed.\n"); exit(-1); } + if (status < 0) { + printf("Output_Projected_Data: File write failed.\n"); + exit(-1); + } #endif - #else - printf("OutputProjected Data only defined for hdf5 writes.\n"); - #endif //HDF5 +#else + printf("Output_Projected_Data only defined for hdf5 writes.\n"); +#endif // HDF5 } - /* Output a rotated projection of the grid data to file. */ -void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile) +void Output_Rotated_Projected_Data(Grid3D &G, struct Parameters P, int nfile) { - char filename[100]; - char timestep[20]; - #ifdef HDF5 - hid_t file_id; - herr_t status; +#ifdef HDF5 + hid_t file_id; + herr_t status; // create the filename - strcpy(filename, P.outdir); - sprintf(timestep, "%d_rot_proj", nfile); - strcat(filename,timestep); - strcat(filename,".h5"); - - #ifdef MPI_CHOLLA - sprintf(filename,"%s.%d",filename,procID); - #endif /*MPI_CHOLLA*/ + std::string filename = FnameTemplate(P).format_fname(nfile, "_rot_proj"); - if(G.R.flag_delta==1) - { - //if flag_delta==1, then we are just outputting a - //bunch of rotations of the same snapshot + if (G.R.flag_delta == 1) { + // if flag_delta==1, then we are just outputting a + // bunch of rotations of the same snapshot int i_delta; char fname[200]; - for(i_delta=0;i_delta1 && H.ny==1 && H.nz==1) { + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { fprintf(fp, "id\trho\tmx\tmy\tmz\tE"); - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD - #ifdef DE +#ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); +#endif // MHD +#ifdef DE fprintf(fp, "\tge"); - #endif +#endif fprintf(fp, "\n"); - for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) { + for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { id = i; - fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); - #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #endif //MHD - #ifdef DE + fprintf(fp, "%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], + C.momentum_z[id], C.Energy[id]); +#ifdef MHD + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); +#endif // MHD +#ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE +#endif // DE fprintf(fp, "\n"); } - #ifdef MHD - // Save the last line of magnetic fields - id = H.nx-H.n_ghost; - fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD +#ifdef MHD + // Save the last line of magnetic fields + id = H.nx - H.n_ghost; + fprintf(fp, "%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", id, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); + #ifdef DE + fprintf(fp, "\tNan"); + #endif // DE + fprintf(fp, "\n"); +#endif // MHD } // 2D case - else if (H.nx>1 && H.ny>1 && H.nz==1) { - + else if (H.nx > 1 && H.ny > 1 && H.nz == 1) { fprintf(fp, "idx\tidy\trho\tmx\tmy\tmz\tE"); - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD - #ifdef DE +#ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); +#endif // MHD +#ifdef DE fprintf(fp, "\tge"); - #endif +#endif fprintf(fp, "\n"); - for (i=H.n_ghost; i < H.nx-H.n_ghost; i++) { - for (j=H.n_ghost; j < H.ny-H.n_ghost; j++) { - id = i + j*H.nx; - fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); - #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #endif //MHD - #ifdef DE + for (i = H.n_ghost; i < H.nx - H.n_ghost; i++) { + for (j = H.n_ghost; j < H.ny - H.n_ghost; j++) { + id = i + j * H.nx; + fprintf(fp, "%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, C.density[id], C.momentum_x[id], + C.momentum_y[id], C.momentum_z[id], C.Energy[id]); +#ifdef MHD + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); +#endif // MHD +#ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE +#endif // DE fprintf(fp, "\n"); } - #ifdef MHD - // Save the last line of magnetic fields - id = i + (H.ny-H.n_ghost)*H.nx; - fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i-H.n_ghost, H.ny-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE - fprintf(fp, "\n"); - #endif //MHD - } - #ifdef MHD +#ifdef MHD // Save the last line of magnetic fields - id = H.nx-H.n_ghost + (H.ny-H.n_ghost)*H.nx; - fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx-2*H.n_ghost, H.ny-2*H.n_ghost, C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #ifdef DE - fprintf(fp, "\tNan"); - #endif //DE + id = i + (H.ny - H.n_ghost) * H.nx; + fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", i - H.n_ghost, H.ny - 2 * H.n_ghost, C.magnetic_x[id], + C.magnetic_y[id], C.magnetic_z[id]); + #ifdef DE + fprintf(fp, "\tNan"); + #endif // DE fprintf(fp, "\n"); - #endif //MHD +#endif // MHD + } +#ifdef MHD + // Save the last line of magnetic fields + id = H.nx - H.n_ghost + (H.ny - H.n_ghost) * H.nx; + fprintf(fp, "%d\t%d\tNan\tNan\tNan\tNan\tNan\t%f\t%f\t%f", H.nx - 2 * H.n_ghost, H.ny - 2 * H.n_ghost, + C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); + #ifdef DE + fprintf(fp, "\tNan"); + #endif // DE + fprintf(fp, "\n"); +#endif // MHD } // 3D case else { fprintf(fp, "idx\tidy\tidz\trho\tmx\tmy\tmz\tE"); - #ifdef DE +#ifdef DE fprintf(fp, "\tge"); - #endif - #ifdef MHD - fprintf(fp, "\tmagX\tmagY\tmagZ"); - #endif //MHD +#endif +#ifdef MHD + fprintf(fp, "\tmagX\tmagY\tmagZ"); +#endif // MHD fprintf(fp, "\n"); - for (i=H.n_ghost-1; i < H.nx-H.n_ghost; i++) { - for (j=H.n_ghost-1; j < H.ny-H.n_ghost; j++) { - for (k=H.n_ghost-1; k < H.nz-H.n_ghost; k++) { - id = i + j*H.nx + k*H.nx*H.ny; + for (i = H.n_ghost - 1; i < H.nx - H.n_ghost; i++) { + for (j = H.n_ghost - 1; j < H.ny - H.n_ghost; j++) { + for (k = H.n_ghost - 1; k < H.nz - H.n_ghost; k++) { + id = i + j * H.nx + k * H.nx * H.ny; // Exclude the rightmost ghost cell on the "left" side for the hydro // variables - if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) - { - fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost, C.density[id], C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); - #ifdef DE + if ((i >= H.n_ghost) and (j >= H.n_ghost) and (k >= H.n_ghost)) { + fprintf(fp, "%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f", i - H.n_ghost, j - H.n_ghost, k - H.n_ghost, C.density[id], + C.momentum_x[id], C.momentum_y[id], C.momentum_z[id], C.Energy[id]); +#ifdef DE fprintf(fp, "\t%f", C.GasEnergy[id]); - #endif //DE - } - else - { - fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i-H.n_ghost, j-H.n_ghost, k-H.n_ghost); - #ifdef DE - fprintf(fp, "\tn/a"); - #endif //DE +#endif // DE + } else { + fprintf(fp, "%d\t%d\t%d\tn/a\tn/a\tn/a\tn/a\tn/a", i - H.n_ghost, j - H.n_ghost, k - H.n_ghost); +#ifdef DE + fprintf(fp, "\tn/a"); +#endif // DE } - #ifdef MHD - fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); - #endif //MHD +#ifdef MHD + fprintf(fp, "\t%f\t%f\t%f", C.magnetic_x[id], C.magnetic_y[id], C.magnetic_z[id]); +#endif // MHD fprintf(fp, "\n"); } } @@ -1079,9 +1020,6 @@ void Grid3D::Write_Grid_Text(FILE *fp) } } - - - /*! \fn void Write_Grid_Binary(FILE *fp) * \brief Write the conserved quantities to a binary output file. */ void Grid3D::Write_Grid_Binary(FILE *fp) @@ -1091,174 +1029,320 @@ void Grid3D::Write_Grid_Binary(FILE *fp) // Write the conserved quantities to the output file // 1D case - if (H.nx>1 && H.ny==1 && H.nz==1) { - + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { id = H.n_ghost; - fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp); + fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp); fwrite(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp); fwrite(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp); fwrite(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp); - fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); - #ifdef DE - fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); - #endif //DE + fwrite(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); +#ifdef DE + fwrite(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); +#endif // DE } // 2D case - else if (H.nx>1 && H.ny>1 && H.nz==1) { - - for (j=0; j 1 && H.ny > 1 && H.nz == 1) { + for (j = 0; j < H.ny_real; j++) { + id = H.n_ghost + (j + H.n_ghost) * H.nx; fwrite(&(C.density[id]), sizeof(Real), H.nx_real, fp); } - for (j=0; j 1 && ny > 1 && nz > 1) { + for (k = 0; k < nz_real; k++) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + buf_id = k + j * nz_real + i * nz_real * ny_real; + hdf5_buffer[buf_id] = grid_buffer[id]; + } + } + } + return; + } + + // 2D case + if (nx > 1 && ny > 1 && nz == 1) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx; + buf_id = j + i * ny_real; + hdf5_buffer[buf_id] = grid_buffer[id]; + } + } + return; + } + + // 1D case + if (nx > 1 && ny == 1 && nz == 1) { + id = n_ghost; + memcpy(&hdf5_buffer[0], &grid_buffer[id], nx_real * sizeof(Real)); + return; + } +} + +/* \brief Before HDF5 reads data into a buffer, remap and write grid to HDF5 buffer. */ +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real *hdf5_buffer, Real *device_hdf5_buffer, Real *device_grid_buffer); +// From src/io/io_gpu + +// Set up dataspace for grid formatted data and write dataset +void Write_HDF5_Dataset_Grid(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, hid_t file_id, + Real *dataset_buffer, const char *name) +{ + // Set up dataspace + + hid_t dataspace_id; + // 1-D Case + if (nx > 1 && ny == 1 && nz == 1) { + int rank = 1; + hsize_t dims[1]; + dims[0] = nx_real; + dataspace_id = H5Screate_simple(rank, dims, NULL); + } + // 2-D Case + if (nx > 1 && ny > 1 && nz == 1) { + int rank = 2; + hsize_t dims[2]; + dims[0] = nx_real; + dims[1] = ny_real; + dataspace_id = H5Screate_simple(rank, dims, NULL); + } + // 3-D Case + if (nx > 1 && ny > 1 && nz > 1) { + int rank = 3; + hsize_t dims[3]; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; + dataspace_id = H5Screate_simple(rank, dims, NULL); + } + + // Write to HDF5 file + + Write_HDF5_Dataset(file_id, dataspace_id, dataset_buffer, name); + + // Close dataspace + herr_t status = H5Sclose(dataspace_id); +} + +// Data moves from host grid_buffer to dataset_buffer to hdf5 file +void Write_Grid_HDF5_Field_CPU(Header H, hid_t file_id, Real *dataset_buffer, Real *grid_buffer, const char *name) +{ + Fill_HDF5_Buffer_From_Grid_CPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, + grid_buffer); + Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); +} + +// Data moves from device_grid_buffer to device_hdf5_buffer to dataset_buffer to hdf5 file +void Write_Grid_HDF5_Field_GPU(Header H, hid_t file_id, Real *dataset_buffer, Real *device_hdf5_buffer, + Real *device_grid_buffer, const char *name) +{ + Fill_HDF5_Buffer_From_Grid_GPU(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, + device_hdf5_buffer, device_grid_buffer); + Write_HDF5_Dataset_Grid(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, file_id, dataset_buffer, name); +} + +void Write_Generic_HDF5_Field_CPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + hid_t file_id, Real *dataset_buffer, Real *source_buffer, const char *name) +{ + Fill_HDF5_Buffer_From_Grid_CPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, source_buffer); + Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); +} + +void Write_Generic_HDF5_Field_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + hid_t file_id, Real *dataset_buffer, Real *device_hdf5_buffer, Real *source_buffer, + const char *name) +{ + Fill_HDF5_Buffer_From_Grid_GPU(nx, ny, nz, nx_real, ny_real, nz_real, n_ghost, dataset_buffer, device_hdf5_buffer, + source_buffer); + Write_HDF5_Dataset_Grid(nx, ny, nz, nx_real, ny_real, nz_real, file_id, dataset_buffer, name); } /*! \fn void Write_Grid_HDF5(hid_t file_id) @@ -1266,562 +1350,361 @@ void Write_HDF5_Field_2D_CPU(Header H, hid_t file_id, hid_t dataspace_id, float* void Grid3D::Write_Grid_HDF5(hid_t file_id) { int i, j, k, id, buf_id; - hid_t dataset_id, dataspace_id; - hid_t dataset_id_full, dataspace_id_full; - Real *dataset_buffer; - herr_t status; + hid_t dataset_id, dataspace_id; + hid_t dataset_id_full, dataspace_id_full; + Real *dataset_buffer; + herr_t status; bool output_energy; bool output_momentum; - #ifdef OUTPUT_ENERGY output_energy = true; - #else // not OUTPUT_ENERGY + #else // not OUTPUT_ENERGY output_energy = false; - #endif //OUTPUT_ENERGY + #endif // OUTPUT_ENERGY #ifdef OUTPUT_MOMENTUM output_momentum = true; - #else // not OUTPUT_MOMENTUM + #else // not OUTPUT_MOMENTUM output_momentum = false; - #endif //OUTPUT_MOMENTUM + #endif // OUTPUT_MOMENTUM #if defined(COOLING_GRACKLE) || defined(CHEMISTRY_GPU) bool output_metals, output_electrons, output_full_ionization; - #ifdef OUTPUT_METALS + #ifdef OUTPUT_METALS output_metals = true; - #else // not OUTPUT_METALS + #else // not OUTPUT_METALS output_metals = false; - #endif //OUTPUT_METALS - #ifdef OUTPUT_ELECTRONS + #endif // OUTPUT_METALS + #ifdef OUTPUT_ELECTRONS output_electrons = true; - #else // not OUTPUT_ELECTRONS + #else // not OUTPUT_ELECTRONS output_electrons = false; - #endif //OUTPUT_ELECTRONS - #ifdef OUTPUT_FULL_IONIZATION + #endif // OUTPUT_ELECTRONS + #ifdef OUTPUT_FULL_IONIZATION output_full_ionization = true; - #else // not OUTPUT_FULL_IONIZATION + #else // not OUTPUT_FULL_IONIZATION output_full_ionization = false; - #endif //OUTPUT_FULL_IONIZATION + #endif // OUTPUT_FULL_IONIZATION - #endif // COOLING_GRACKLE or CHEMISTRY_GPU + #endif // COOLING_GRACKLE or CHEMISTRY_GPU - #if defined(GRAVITY_GPU) && defined(OUTPUT_POTENTIAL) - CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif//GRAVITY_GPU and OUTPUT_POTENTIAL - - - - // 1D case - if (H.nx>1 && H.ny==1 && H.nz==1) { - - int nx_dset = H.nx_real; - hsize_t dims[1]; - dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real)); - - // Create the data space for the datasets - dims[0] = nx_dset; - dataspace_id = H5Screate_simple(1, dims, NULL); - - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); - - #ifdef SCALAR - for (int s=0; s 1 this substitution can be attempted. - // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); - - id = H.n_ghost; - memcpy(&dataset_buffer[0], &(C.scalar[id+s*H.n_cells]), H.nx_real*sizeof(Real)); - // dataset here is just a name - status = HDF5_Dataset(file_id, dataspace_id, dataset_buffer, dataset); - } - - #endif //SCALAR + // Allocate necessary buffers + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + #ifdef MHD + size_t buffer_size = (nx_dset + 1) * (ny_dset + 1) * (nz_dset + 1); + #else + size_t buffer_size = nx_dset * ny_dset * nz_dset; + #endif + cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; + dataset_buffer = (Real *)malloc(buffer_size * sizeof(Real)); - #ifdef DE - Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, C.GasEnergy, "/GasEnergy"); - #endif //DE + // Start writing fields - // Free the dataspace id - status = H5Sclose(dataspace_id); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_density, "/density"); + if (output_momentum || H.Output_Complete_Data) { + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_x, "/momentum_x"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_y, "/momentum_y"); + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_momentum_z, "/momentum_z"); } - - - // 2D case - if (H.nx>1 && H.ny>1 && H.nz==1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - hsize_t dims[2]; - dataset_buffer = (Real *) malloc(H.ny_real*H.nx_real*sizeof(Real)); - - // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; - dataspace_id = H5Screate_simple(2, dims, NULL); - - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.density, "/density"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_x, "/momentum_x"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_y, "/momentum_y"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.momentum_z, "/momentum_z"); - Write_HDF5_Field_2D_CPU(H, file_id, dataspace_id, dataset_buffer, C.Energy, "/Energy"); - - #ifdef SCALAR - for (int s=0; s 1 this substitution can be attempted. - // Write_HDF5_Field_1D_CPU(H, file_id, dataspace_id, dataset_buffer, &(C.scalar[s*H.n_cells]), dataset); - - // Copy the scalar array to the memory buffer - for (j=0; j1 && H.ny>1 && H.nz>1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[3]; - hsize_t dims_full[3]; + #ifdef SCALAR - size_t buffer_size; - // Need a larger device buffer for MHD. In the future, if other fields need a larger device buffer, choose the maximum of the sizes. - // If the buffer is too large, it does not cause bugs (Oct 6 2022) - #ifdef MHD - buffer_size = (nx_dset+1)*(ny_dset+1)*(nz_dset+1); - #else - buffer_size = nx_dset*ny_dset*nz_dset; - #endif - // Using static DeviceVector here automatically allocates the buffer the first time it is needed - // It persists until program exit, and then calls Free upon destruction - cuda_utilities::DeviceVector static device_dataset_vector{buffer_size}; - double* device_dataset_buffer = device_dataset_vector.data(); - dataset_buffer = (Real*) malloc(buffer_size*sizeof(Real)); - //CudaSafeCall(cudaMalloc(&device_dataset_buffer,nx_dset*ny_dset*nz_dset*sizeof(double))); - - - // Create the data space for the datasets (note: WriteHDF5Field3D creates its own dataspace, does not use the shared one) - dims[0] = nx_dset; - dims[1] = ny_dset; - dims[2] = nz_dset; - dataspace_id = H5Screate_simple(3, dims, NULL); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_density, "/density"); - if ( output_momentum || H.Output_Complete_Data ) { - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_x, "/momentum_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_y, "/momentum_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_momentum_z, "/momentum_z"); - } + #ifdef BASIC_SCALAR + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_basic_scalar, "/scalar0"); + #endif // BASIC_SCALAR - if ( output_energy || H.Output_Complete_Data ){ - WriteHDF5Field3D(H.nx, H.ny, nx_dset, ny_dset, nz_dset, H.n_ghost, file_id, dataset_buffer, device_dataset_buffer, C.d_Energy, "/Energy"); - } + #ifdef DUST + Write_Grid_HDF5_Field_GPU(H, file_id, dataset_buffer, device_dataset_vector.data(), C.d_dust_density, + "/dust_density"); + #endif // DUST - #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont write scalars when using grackle - for (int s=0; s 1 && H.ny > 1 && H.nz > 1) { + #if defined(GRAVITY) && defined(OUTPUT_POTENTIAL) + Write_Generic_HDF5_Field_GPU(Grav.nx_local + 2 * N_GHOST_POTENTIAL, Grav.ny_local + 2 * N_GHOST_POTENTIAL, + Grav.nz_local + 2 * N_GHOST_POTENTIAL, Grav.nx_local, Grav.ny_local, Grav.nz_local, + N_GHOST_POTENTIAL, file_id, dataset_buffer, device_dataset_vector.data(), + Grav.F.potential_d, "/grav_potential"); + #endif // GRAVITY and OUTPUT_POTENTIAL - #ifdef MHD + #ifdef MHD if (H.Output_Complete_Data) { - // Note: for WriteHDF5Field3D, use the left side n_ghost - WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_x, "/magnetic_x"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_y, "/magnetic_y"); - WriteHDF5Field3D(H.nx, H.ny, nx_dset+1, ny_dset+1, nz_dset+1, H.n_ghost-1, file_id, dataset_buffer, device_dataset_buffer, C.d_magnetic_z, "/magnetic_z"); - } - #endif //MHD - - // Free the dataspace id - status = H5Sclose(dataspace_id); - //CudaSafeCall(cudaFree(device_dataset_buffer));// No longer needed because devicevector frees when it should + Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real + 1, H.ny_real, H.nz_real, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), C.d_magnetic_x, "/magnetic_x", 0); + Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real + 1, H.nz_real, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), C.d_magnetic_y, "/magnetic_y", 1); + Write_HDF5_Field_3D(H.nx, H.ny, H.nx_real, H.ny_real, H.nz_real + 1, H.n_ghost, file_id, dataset_buffer, + device_dataset_vector.data(), C.d_magnetic_z, "/magnetic_z", 2); + } + #endif // MHD } + free(dataset_buffer); } -#endif //HDF5 - +#endif // HDF5 #ifdef HDF5 /*! \fn void Write_Projection_HDF5(hid_t file_id) - * \brief Write projected density and temperature data to a file, at the current simulation time. */ + * \brief Write projected density and temperature data to a file, at the + * current simulation time. */ void Grid3D::Write_Projection_HDF5(hid_t file_id) { - int i, j, k, id, buf_id; - hid_t dataset_id, dataspace_xy_id, dataspace_xz_id; - Real *dataset_buffer_dxy, *dataset_buffer_dxz; - Real *dataset_buffer_Txy, *dataset_buffer_Txz; - herr_t status; - Real dxy, dxz, Txy, Txz, n, T; - + hid_t dataset_id, dataspace_xy_id, dataspace_xz_id; + Real *dataset_buffer_dxy, *dataset_buffer_dxz; + Real *dataset_buffer_Txy, *dataset_buffer_Txz; + herr_t status; + Real dxy, dxz, Txy, Txz; + #ifdef DUST + Real dust_xy, dust_xz; + Real *dataset_buffer_dust_xy, *dataset_buffer_dust_xz; + #endif - n = T = 0; Real mu = 0.6; // 3D - if (H.nx>1 && H.ny>1 && H.nz>1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[2]; - dataset_buffer_dxy = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_dxz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real)); - dataset_buffer_Txy = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_Txz = (Real *) malloc(H.nx_real*H.nz_real*sizeof(Real)); + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + hsize_t dims[2]; + dataset_buffer_dxy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_dxz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + dataset_buffer_Txy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_Txz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #ifdef DUST + dataset_buffer_dust_xy = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_dust_xz = (Real *)malloc(H.nx_real * H.nz_real * sizeof(Real)); + #endif // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; + dims[0] = nx_dset; + dims[1] = ny_dset; dataspace_xy_id = H5Screate_simple(2, dims, NULL); - dims[1] = nz_dset; + dims[1] = nz_dset; dataspace_xz_id = H5Screate_simple(2, dims, NULL); // Copy the xy density and temperature projections to the memory buffer - for (j=0; j1 && H.ny>1 && H.nz>1) { - - Real Lx = R.Lx; //projected box size in x dir - Real Lz = R.Lz; //projected box size in z dir + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + Real Lx = R.Lx; // projected box size in x dir + Real Lz = R.Lz; // projected box size in z dir int nx_dset = R.nx; int nz_dset = R.nz; if (R.nx * R.nz == 0) { - chprintf("WARNING: compiled with -DROTATED_PROJECTION but input parameters nxr or nzr = 0\n"); + chprintf( + "WARNING: compiled with -DROTATED_PROJECTION but input parameters " + "nxr or nzr = 0\n"); return; } @@ -1829,117 +1712,121 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) // this piece of the simulation volume // min and max values were set in the header write int nx_min, nx_max, nz_min, nz_max; - nx_min = R.nx_min; - nx_max = R.nx_max; - nz_min = R.nz_min; - nz_max = R.nz_max; - nx_dset = nx_max-nx_min; - nz_dset = nz_max-nz_min; + nx_min = R.nx_min; + nx_max = R.nx_max; + nz_min = R.nz_min; + nz_max = R.nz_max; + nx_dset = nx_max - nx_min; + nz_dset = nz_max - nz_min; - hsize_t dims[2]; + hsize_t dims[2]; // allocate the buffers for the projected dataset // and initialize to zero - dataset_buffer_dxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_Txzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_vxxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_vyxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); - dataset_buffer_vzxzr = (Real *) calloc(nx_dset*nz_dset,sizeof(Real)); + dataset_buffer_dxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_Txzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_vxxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_vyxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); + dataset_buffer_vzxzr = (Real *)calloc(nx_dset * nz_dset, sizeof(Real)); // Create the data space for the datasets - dims[0] = nx_dset; - dims[1] = nz_dset; + dims[0] = nx_dset; + dims[1] = nz_dset; dataspace_xzr_id = H5Screate_simple(2, dims, NULL); // Copy the xz rotated projection to the memory buffer - for (k=0; k=0)&&(ix=0)&&(iz= 0) && (ix < nx_dset) && (iz >= 0) && (iz < nz_dset)) { + int const buf_id = iz + ix * nz_dset; + d = C.density[id]; // project density - dataset_buffer_dxzr[buf_id] += d*H.dy; + dataset_buffer_dxzr[buf_id] += d * H.dy; // calculate number density - n = d*DENSITY_UNIT/(mu*MP); - // calculate temperature - #ifndef DE - Real mx = C.momentum_x[id]; - Real my = C.momentum_y[id]; - Real mz = C.momentum_z[id]; - Real E = C.Energy[id]; - T = (E - 0.5*(mx*mx + my*my + mz*mz)/C.density[id])*(gama-1.0)*PRESSURE_UNIT / (n*KB); - #endif - #ifdef DE - T = C.GasEnergy[id]*PRESSURE_UNIT*(gama-1.0) / (n*KB); - #endif - Txz = T*d*H.dy; + Real const n = d * DENSITY_UNIT / (mu * MP); + + // calculate temperature + #ifdef DE + Real const T = hydro_utilities::Calc_Temp_DE(C.GasEnergy[id], gama, n); + #else // DE is not defined + Real const mx = C.momentum_x[id]; + Real const my = C.momentum_y[id]; + Real const mz = C.momentum_z[id]; + Real const E = C.Energy[id]; + + #ifdef MHD + auto const [magnetic_x, magnetic_y, magnetic_z] = + mhd::utils::cellCenteredMagneticFields(C.host, id, xid, yid, zid, H.n_cells, H.nx, H.ny); + Real const T = + hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n, magnetic_x, magnetic_y, magnetic_z); + #else // MHD is not defined + Real const T = hydro_utilities::Calc_Temp_Conserved(E, d, mx, my, mz, gama, n); + #endif // MHD + #endif // DE + + Txz = T * d * H.dy; dataset_buffer_Txzr[buf_id] += Txz; - //compute velocities - vx = C.momentum_x[id]; - dataset_buffer_vxxzr[buf_id] += vx*H.dy; - vy = C.momentum_y[id]; - dataset_buffer_vyxzr[buf_id] += vy*H.dy; - vz = C.momentum_z[id]; - dataset_buffer_vzxzr[buf_id] += vz*H.dy; + // compute velocities + dataset_buffer_vxxzr[buf_id] += C.momentum_x[id] * H.dy; + dataset_buffer_vyxzr[buf_id] += C.momentum_y[id] * H.dy; + dataset_buffer_vzxzr[buf_id] += C.momentum_z[id] * H.dy; } } } } // Write projected d,T,vx,vy,vz - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr"); - status = HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_dxzr, "/d_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_Txzr, "/T_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vxxzr, "/vx_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vyxzr, "/vy_xzr"); + status = Write_HDF5_Dataset(file_id, dataspace_xzr_id, dataset_buffer_vzxzr, "/vz_xzr"); // Free the dataspace id status = H5Sclose(dataspace_xzr_id); - //free the data + // free the data free(dataset_buffer_dxzr); free(dataset_buffer_Txzr); free(dataset_buffer_vxxzr); free(dataset_buffer_vyxzr); free(dataset_buffer_vzxzr); + } else { + chprintf("Rotated projection write only implemented for 3D data.\n"); } - else chprintf("Rotated projection write only implemented for 3D data.\n"); - - - } -#endif //HDF5 - +#endif // HDF5 #ifdef HDF5 /*! \fn void Write_Slices_HDF5(hid_t file_id) @@ -1948,81 +1835,103 @@ void Grid3D::Write_Rotated_Projection_HDF5(hid_t file_id) void Grid3D::Write_Slices_HDF5(hid_t file_id) { int i, j, k, id, buf_id; - hid_t dataset_id, dataspace_id; - Real *dataset_buffer_d; - Real *dataset_buffer_mx; - Real *dataset_buffer_my; - Real *dataset_buffer_mz; - Real *dataset_buffer_E; + hid_t dataset_id, dataspace_id; + Real *dataset_buffer_d; + Real *dataset_buffer_mx; + Real *dataset_buffer_my; + Real *dataset_buffer_mz; + Real *dataset_buffer_E; #ifdef DE - Real *dataset_buffer_GE; + Real *dataset_buffer_GE; #endif #ifdef SCALAR - Real *dataset_buffer_scalar; + Real *dataset_buffer_scalar; #endif - herr_t status; + herr_t status; int xslice, yslice, zslice; - xslice = H.nx/2; - yslice = H.ny/2; - zslice = H.nz/2; + xslice = H.nx / 2; + yslice = H.ny / 2; + zslice = H.nz / 2; #ifdef MPI_CHOLLA - xslice = nx_global/2; - yslice = ny_global/2; - zslice = nz_global/2; + xslice = nx_global / 2; + yslice = ny_global / 2; + zslice = nz_global / 2; #endif - // 3D - if (H.nx>1 && H.ny>1 && H.nz>1) { - - int nx_dset = H.nx_real; - int ny_dset = H.ny_real; - int nz_dset = H.nz_real; - hsize_t dims[2]; - + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { + int nx_dset = H.nx_real; + int ny_dset = H.ny_real; + int nz_dset = H.nz_real; + hsize_t dims[2]; // Create the xy data space for the datasets - dims[0] = nx_dset; - dims[1] = ny_dset; + dims[0] = nx_dset; + dims[1] = ny_dset; dataspace_id = H5Screate_simple(2, dims, NULL); // Allocate memory for the xy slices - dataset_buffer_d = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_mx = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_my = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_mz = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - dataset_buffer_E = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - #ifdef DE - dataset_buffer_GE = (Real *) malloc(H.nx_real*H.ny_real*sizeof(Real)); - #endif - #ifdef SCALAR - dataset_buffer_scalar = (Real *) malloc(NSCALARS*H.nx_real*H.ny_real*sizeof(Real)); - #endif + dataset_buffer_d = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mx = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_my = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_mz = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + dataset_buffer_E = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #ifdef MHD + std::vector dataset_buffer_magnetic_x(H.nx_real * H.ny_real); + std::vector dataset_buffer_magnetic_y(H.nx_real * H.ny_real); + std::vector dataset_buffer_magnetic_z(H.nx_real * H.ny_real); + #endif // MHD + #ifdef DE + dataset_buffer_GE = (Real *)malloc(H.nx_real * H.ny_real * sizeof(Real)); + #endif + #ifdef SCALAR + dataset_buffer_scalar = (Real *)malloc(NSCALARS * H.nx_real * H.ny_real * sizeof(Real)); + #endif // Copy the xy slices to the memory buffers - for (j=0; j= nz_local_start && zslice < nz_local_start+nz_local) { - id = (i+H.n_ghost) + (j+H.n_ghost)*H.nx + (zslice-nz_local_start+H.n_ghost)*H.nx*H.ny; - #endif //MPI_CHOLLA + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real; i++) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice, H.nx, H.ny); + buf_id = j + i * H.ny_real; + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, j + H.n_ghost, zslice, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost - 1, zslice, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice - 1, H.nx, H.ny); + #endif // MHD + #ifdef MPI_CHOLLA + // When there are multiple processes, check whether this slice is in + // your domain + if (zslice >= nz_local_start && zslice < nz_local_start + nz_local) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, zslice - nz_local_start + H.n_ghost, H.nx, + H.ny); + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, j + H.n_ghost, + zslice - nz_local_start + H.n_ghost, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost - 1, + zslice - nz_local_start + H.n_ghost, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, j + H.n_ghost, + zslice - nz_local_start + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD + #endif // MPI_CHOLLA dataset_buffer_d[buf_id] = C.density[id]; dataset_buffer_mx[buf_id] = C.momentum_x[id]; dataset_buffer_my[buf_id] = C.momentum_y[id]; dataset_buffer_mz[buf_id] = C.momentum_z[id]; dataset_buffer_E[buf_id] = C.Energy[id]; - #ifdef DE + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]); + dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]); + dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]); + #endif // MHD + #ifdef DE dataset_buffer_GE[buf_id] = C.GasEnergy[id]; - #endif - #ifdef SCALAR - for (int ii=0; ii= ny_local_start && yslice < ny_local_start+ny_local) { - id = (i+H.n_ghost) + (yslice-ny_local_start+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; - #endif //MPI_CHOLLA - dataset_buffer_d[buf_id] = C.density[id]; - dataset_buffer_mx[buf_id] = C.momentum_x[id]; - dataset_buffer_my[buf_id] = C.momentum_y[id]; - dataset_buffer_mz[buf_id] = C.momentum_z[id]; - dataset_buffer_E[buf_id] = C.Energy[id]; - #ifdef DE - dataset_buffer_GE[buf_id] = C.GasEnergy[id]; - #endif - #ifdef SCALAR - for (int ii=0; ii= ny_local_start && yslice < ny_local_start + ny_local) { + id = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost, k + H.n_ghost, H.nx, + H.ny); + #ifdef MHD + int id_xm1 = cuda_utilities::compute1DIndex(i + H.n_ghost - 1, yslice - ny_local_start + H.n_ghost, + k + H.n_ghost, H.nx, H.ny); + int id_ym1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost - 1, + k + H.n_ghost, H.nx, H.ny); + int id_zm1 = cuda_utilities::compute1DIndex(i + H.n_ghost, yslice - ny_local_start + H.n_ghost, + k + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]); + dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]); + dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]); + #endif // MHD + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.nx * H.nz] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA } // if the slice isn't in your domain, just write out zeros else { @@ -2124,31 +2066,41 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; - #ifdef DE + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0; + dataset_buffer_magnetic_y[buf_id] = 0; + dataset_buffer_magnetic_z[buf_id] = 0; + #endif // MHD + #ifdef DE dataset_buffer_GE[buf_id] = 0; - #endif - #ifdef SCALAR - for (int ii=0; ii= nx_local_start && xslice < nx_local_start+nx_local) { - id = (xslice-nx_local_start) + (j+H.n_ghost)*H.nx + (k+H.n_ghost)*H.nx*H.ny; - #endif //MPI_CHOLLA - dataset_buffer_d[buf_id] = C.density[id]; - dataset_buffer_mx[buf_id] = C.momentum_x[id]; - dataset_buffer_my[buf_id] = C.momentum_y[id]; - dataset_buffer_mz[buf_id] = C.momentum_z[id]; - dataset_buffer_E[buf_id] = C.Energy[id]; - #ifdef DE - dataset_buffer_GE[buf_id] = C.GasEnergy[id]; - #endif - #ifdef SCALAR - for (int ii=0; ii= nx_local_start && xslice < nx_local_start + nx_local) { + id = cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + #ifdef MHD + int id_xm1 = + cuda_utilities::compute1DIndex(xslice - nx_local_start - 1, j + H.n_ghost, k + H.n_ghost, H.nx, H.ny); + int id_ym1 = + cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost - 1, k + H.n_ghost, H.nx, H.ny); + int id_zm1 = + cuda_utilities::compute1DIndex(xslice - nx_local_start, j + H.n_ghost, k + H.n_ghost - 1, H.nx, H.ny); + #endif // MHD + #endif // MPI_CHOLLA + dataset_buffer_d[buf_id] = C.density[id]; + dataset_buffer_mx[buf_id] = C.momentum_x[id]; + dataset_buffer_my[buf_id] = C.momentum_y[id]; + dataset_buffer_mz[buf_id] = C.momentum_z[id]; + dataset_buffer_E[buf_id] = C.Energy[id]; + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0.5 * (C.magnetic_x[id] + C.magnetic_x[id_xm1]); + dataset_buffer_magnetic_y[buf_id] = 0.5 * (C.magnetic_y[id] + C.magnetic_y[id_ym1]); + dataset_buffer_magnetic_z[buf_id] = 0.5 * (C.magnetic_z[id] + C.magnetic_z[id_zm1]); + #endif // MHD + #ifdef DE + dataset_buffer_GE[buf_id] = C.GasEnergy[id]; + #endif + #ifdef SCALAR + for (int ii = 0; ii < NSCALARS; ii++) { + dataset_buffer_scalar[buf_id + ii * H.ny * H.nz] = C.scalar[id + ii * H.n_cells]; + } + #endif + #ifdef MPI_CHOLLA } // if the slice isn't in your domain, just write out zeros else { @@ -2218,32 +2192,41 @@ void Grid3D::Write_Slices_HDF5(hid_t file_id) dataset_buffer_my[buf_id] = 0; dataset_buffer_mz[buf_id] = 0; dataset_buffer_E[buf_id] = 0; - #ifdef DE + #ifdef MHD + dataset_buffer_magnetic_x[buf_id] = 0; + dataset_buffer_magnetic_y[buf_id] = 0; + dataset_buffer_magnetic_z[buf_id] = 0; + #endif // MHD + #ifdef DE dataset_buffer_GE[buf_id] = 0; - #endif - #ifdef SCALAR - for (int ii=0; ii1 && H.ny==1 && H.nz==1) { - + if (H.nx > 1 && H.ny == 1 && H.nz == 1) { id = H.n_ghost; - fread(&(C.density[id]), sizeof(Real), H.nx_real, fp); + fread(&(C.density[id]), sizeof(Real), H.nx_real, fp); fread(&(C.momentum_x[id]), sizeof(Real), H.nx_real, fp); fread(&(C.momentum_y[id]), sizeof(Real), H.nx_real, fp); fread(&(C.momentum_z[id]), sizeof(Real), H.nx_real, fp); - fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); - #ifdef DE - fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); - #endif + fread(&(C.Energy[id]), sizeof(Real), H.nx_real, fp); + #ifdef DE + fread(&(C.GasEnergy[id]), sizeof(Real), H.nx_real, fp); + #endif } // 2D case - else if (H.nx>1 && H.ny>1 && H.nz==1) { - for (j=0; j 1 && H.ny > 1 && H.nz == 1) { + for (j = 0; j < H.ny_real; j++) { + id = H.n_ghost + (j + H.n_ghost) * H.nx; fread(&(C.density[id]), sizeof(Real), H.nx_real, fp); } - for (j=0; j1 && H.ny==1 && H.nz==1) { - - // need a dataset buffer to remap fastest index - dataset_buffer = (Real *) malloc(H.nx_real*sizeof(Real)); - - // Open the density dataset - dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the density array to the grid - id = H.n_ghost; - memcpy(&(C.density[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - - - // Open the x momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the x momentum array to the grid - id = H.n_ghost; - memcpy(&(C.momentum_x[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - - - // Open the y momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the y momentum array to the grid - id = H.n_ghost; - memcpy(&(C.momentum_y[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); +/* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */ +void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real *hdf5_buffer, Real *grid_buffer) +{ + // Note: for 1D ny_real and nz_real are not used + // And for 2D nz_real is not used. + // This protects the magnetic case where ny_real/nz_real += 1 - // Open the z momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the z momentum array to the grid - id = H.n_ghost; - memcpy(&(C.momentum_z[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - - - // Open the Energy dataset - dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the Energy array to the grid - id = H.n_ghost; - memcpy(&(C.Energy[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - - - #ifdef DE - // Open the internal energy dataset - dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the internal energy array to the grid - id = H.n_ghost; - memcpy(&(C.GasEnergy[id]), &dataset_buffer[0], H.nx_real*sizeof(Real)); - #endif //DE - - #ifdef SCALAR - for (int s=0; s 1 && ny > 1 && nz > 1) { + for (k = 0; k < nz_real; k++) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx + (k + n_ghost) * nx * ny; + buf_id = k + j * nz_real + i * nz_real * ny_real; + grid_buffer[id] = hdf5_buffer[buf_id]; + } + } } - #endif //SCALAR + return; } // 2D case - if (H.nx>1 && H.ny>1 && H.nz==1) { - - // need a dataset buffer to remap fastest index - dataset_buffer = (Real *) malloc(H.ny_real*H.nx_real*sizeof(Real)); - - - // Open the density dataset - dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the density array to the grid - for (j=0; j 1 && ny > 1 && nz == 1) { + for (j = 0; j < ny_real; j++) { + for (i = 0; i < nx_real; i++) { + id = (i + n_ghost) + (j + n_ghost) * nx; + buf_id = j + i * ny_real; + grid_buffer[id] = hdf5_buffer[buf_id]; } } + return; + } + // 1D case + if (nx > 1 && ny == 1 && nz == 1) { + id = n_ghost; + memcpy(&grid_buffer[id], &hdf5_buffer[0], nx_real * sizeof(Real)); + return; + } +} - // Open the z momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - // Copy the z momentum array to the grid - for (j=0; j1 && H.ny>1 && H.nz>1) { + #endif // SCALAR + // MHD only valid in 3D case + if (H.nx > 1 && H.ny > 1 && H.nz > 1) { // Compute Statistic of Initial data Real mean_l, min_l, max_l; Real mean_g, min_g, max_g; - // need a dataset buffer to remap fastest index - dataset_buffer = (Real *) malloc(H.nz_real*H.ny_real*H.nx_real*sizeof(Real)); - - - // Open the density dataset - dataset_id = H5Dopen(file_id, "/density", H5P_DEFAULT); - // Read the density array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - - mean_l = 0; - min_l = 1e65; - max_l = -1; - - // Copy the density array to the grid - for (k=0; k max_l ) max_l = C.density[id]; - if ( C.density[id] < min_l ) min_l = C.density[id]; - } - } - } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " Density Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - - // Open the x momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_x", H5P_DEFAULT); - // Read the x momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the x momentum array to the grid - for (k=0; k max_l ) max_l = fabs(C.momentum_x[id]); - if ( fabs(C.momentum_x[id]) < min_l ) min_l = fabs(C.momentum_x[id]); - } - } - } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Momentum X) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - // Open the y momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_y", H5P_DEFAULT); - // Read the y momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the y momentum array to the grid - for (k=0; k max_l ) max_l = fabs(C.momentum_y[id]); - if ( fabs(C.momentum_y[id]) < min_l ) min_l = fabs(C.momentum_y[id]); - } - } - } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Momentum Y) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - - // Open the z momentum dataset - dataset_id = H5Dopen(file_id, "/momentum_z", H5P_DEFAULT); - // Read the z momentum array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! + #ifdef MHD + // Open the x magnetic field dataset + dataset_id = H5Dopen(file_id, "/magnetic_x", H5P_DEFAULT); + // Read the x magnetic field array into the dataset buffer // NOTE: NEED TO + // FIX FOR FLOAT REAL!!! status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the z momentum array to the grid - for (k=0; k max_l ) max_l = fabs(C.momentum_z[id]); - if ( fabs(C.momentum_z[id]) < min_l ) min_l = fabs(C.momentum_z[id]); + min_l = 1e65; + max_l = -1; + // Copy the x magnetic field array to the grid + for (k = 0; k < H.nz_real; k++) { + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real + 1; i++) { + id = (i + H.n_ghost - 1) + (j + H.n_ghost) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * (H.nz_real) + i * (H.nz_real) * (H.ny_real); + C.magnetic_x[id] = dataset_buffer[buf_id]; + + mean_l += std::abs(C.magnetic_x[id]); + max_l = std::max(max_l, std::abs(C.magnetic_x[id])); + min_l = std::min(min_l, std::abs(C.magnetic_x[id])); } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= ((H.nz_real + 1) * (H.ny_real) * (H.nx_real)); #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Momentum Z) Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - - // Open the Energy dataset - dataset_id = H5Dopen(file_id, "/Energy", H5P_DEFAULT); - // Read the Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! + chprintf( + " abs(Magnetic X) Mean: %f Min: %f Max: %f [ Msun^1/2 " + "kpc^-1/2 s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY + + // Open the y magnetic field dataset + dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT); + // Read the y magnetic field array into the dataset buffer // NOTE: NEED TO + // FIX FOR FLOAT REAL!!! status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the Energy array to the grid - for (k=0; k max_l ) max_l = C.Energy[id]; - if ( C.Energy[id] < min_l ) min_l = C.Energy[id]; + min_l = 1e65; + max_l = -1; + // Copy the y magnetic field array to the grid + for (k = 0; k < H.nz_real; k++) { + for (j = 0; j < H.ny_real + 1; j++) { + for (i = 0; i < H.nx_real; i++) { + id = (i + H.n_ghost) + (j + H.n_ghost - 1) * H.nx + (k + H.n_ghost) * H.nx * H.ny; + buf_id = k + j * (H.nz_real) + i * (H.nz_real) * (H.ny_real + 1); + C.magnetic_y[id] = dataset_buffer[buf_id]; + + mean_l += std::abs(C.magnetic_x[id]); + max_l = std::max(max_l, std::abs(C.magnetic_x[id])); + min_l = std::min(min_l, std::abs(C.magnetic_x[id])); } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= ((H.nz_real) * (H.ny_real + 1) * (H.nx_real)); #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " Energy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 s^-2 ] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - - #ifdef DE - // Open the internal Energy dataset - dataset_id = H5Dopen(file_id, "/GasEnergy", H5P_DEFAULT); - // Read the internal Energy array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! + chprintf( + " abs(Magnetic Y) Mean: %f Min: %f Max: %f [ Msun^1/2 " + "kpc^-1/2 s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY + + // Open the z magnetic field dataset + dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT); + // Read the z magnetic field array into the dataset buffer // NOTE: NEED TO + // FIX FOR FLOAT REAL!!! status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); // Free the dataset id status = H5Dclose(dataset_id); - Real temp, temp_max_l, temp_min_l, temp_mean_l; - Real temp_min_g, temp_max_g, temp_mean_g; - temp_mean_l = 0; - temp_min_l = 1e65; - temp_max_l = -1; mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the internal Energy array to the grid - for (k=0; k max_l ) max_l = C.GasEnergy[id]; - if ( C.GasEnergy[id] < min_l ) min_l = C.GasEnergy[id]; - temp = C.GasEnergy[id] / C.density[id] * ( gama - 1 ) * MP / KB * 1e10 ; - temp_mean_l += temp; - // chprintf( "%f\n", temp); - if ( temp > temp_max_l ) temp_max_l = temp; - if ( temp < temp_min_l ) temp_min_l = temp; + min_l = 1e65; + max_l = -1; + // Copy the z magnetic field array to the grid + for (k = 0; k < H.nz_real + 1; k++) { + for (j = 0; j < H.ny_real; j++) { + for (i = 0; i < H.nx_real; i++) { + id = (i + H.n_ghost) + (j + H.n_ghost) * H.nx + (k + H.n_ghost - 1) * H.nx * H.ny; + buf_id = k + j * (H.nz_real + 1) + i * (H.nz_real + 1) * (H.ny_real); + C.magnetic_z[id] = dataset_buffer[buf_id]; + + mean_l += std::abs(C.magnetic_x[id]); + max_l = std::max(max_l, std::abs(C.magnetic_x[id])); + min_l = std::min(min_l, std::abs(C.magnetic_x[id])); } } } - mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); - temp_mean_l /= ( H.nz_real * H.ny_real * H.nx_real ); + mean_l /= ((H.nz_real) * (H.ny_real) * (H.nx_real + 1)); #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); + mean_g = ReduceRealAvg(mean_l); + max_g = ReduceRealMax(max_l); + min_g = ReduceRealMin(min_l); mean_l = mean_g; - max_l = max_g; - min_l = min_g; - temp_mean_g = ReduceRealAvg( temp_mean_l ); - temp_max_g = ReduceRealMax( temp_max_l ); - temp_min_g = ReduceRealMin( temp_min_l ); - temp_mean_l = temp_mean_g; - temp_max_l = temp_max_g; - temp_min_l = temp_min_g; - #endif //MPI_CHOLLA + max_l = max_g; + min_l = min_g; + #endif // MPI_CHOLLA #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " GasEnergy Mean: %f Min: %f Max: %f [ h^2 Msun kpc^-3 km^2 s^-2 ] \n", mean_l, min_l, max_l ); - chprintf( " Temperature Mean: %f Min: %f Max: %f [ K ] \n", temp_mean_l, temp_min_l, temp_max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - #endif//DE - - #ifdef SCALAR - #if !defined(COOLING_GRACKLE) && !defined(CHEMISTRY_GPU) // Dont Load scalars when using grackle or CHEMISTRY_GPU - for (int s=0; s max_l ) max_l = fabs(C.magnetic_x[id]); - if ( fabs(C.magnetic_x[id]) < min_l ) min_l = fabs(C.magnetic_x[id]); - } - } - } - mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Magnetic X) Mean: %f Min: %f Max: %f [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - // Open the y magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_y", H5P_DEFAULT); - // Read the y magnetic field array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the y magnetic field array to the grid - for (k=0; k max_l ) max_l = fabs(C.magnetic_y[id]); - if ( fabs(C.magnetic_y[id]) < min_l ) min_l = fabs(C.magnetic_y[id]); - } - } - } - mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Magnetic Y) Mean: %f Min: %f Max: %f [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - - // Open the z magnetic field dataset - dataset_id = H5Dopen(file_id, "/magnetic_z", H5P_DEFAULT); - // Read the z magnetic field array into the dataset buffer // NOTE: NEED TO FIX FOR FLOAT REAL!!! - status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, dataset_buffer); - // Free the dataset id - status = H5Dclose(dataset_id); - - mean_l = 0; - min_l = 1e65; - max_l = -1; - // Copy the z magnetic field array to the grid - for (k=0; k max_l ) max_l = fabs(C.magnetic_z[id]); - if ( fabs(C.magnetic_z[id]) < min_l ) min_l = fabs(C.magnetic_z[id]); - } - } - } - mean_l /= ( (H.nz_real+1) * (H.ny_real+1) * (H.nx_real+1) ); - - #if MPI_CHOLLA - mean_g = ReduceRealAvg( mean_l ); - max_g = ReduceRealMax( max_l ); - min_g = ReduceRealMin( min_l ); - mean_l = mean_g; - max_l = max_g; - min_l = min_g; - #endif //MPI_CHOLLA - - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " abs(Magnetic Z) Mean: %f Min: %f Max: %f [ Msun^1/2 kpc^-1/2 s^-1] \n", mean_l, min_l, max_l ); - #endif //PRINT_INITIAL_STATS and COSMOLOGY - #endif //MHD + chprintf( + " abs(Magnetic Z) Mean: %f Min: %f Max: %f [ Msun^1/2 " + "kpc^-1/2 s^-1] \n", + mean_l, min_l, max_l); + #endif // PRINT_INITIAL_STATS and COSMOLOGY + #endif // MHD } free(dataset_buffer); } #endif - - /* MPI-safe printf routine */ -int chprintf(const char * __restrict sdata, ...) +int chprintf(const char *__restrict sdata, ...) // NOLINT(cert-dcl50-cpp) { int code = 0; -#ifdef MPI_CHOLLA /*limit printf to root process only*/ - if(procID==root) - { -#endif /*MPI_CHOLLA*/ - - va_list ap; - va_start(ap, sdata); - code = vfprintf(stdout, sdata, ap); - va_end(ap); - fflush(stdout); - -#ifdef MPI_CHOLLA + if (Is_Root_Proc()) { + va_list ap; + va_start(ap, sdata); + code = vfprintf(stdout, sdata, ap); // NOLINT(clang-analyzer-valist.Uninitialized) + va_end(ap); + fflush(stdout); } -#endif /*MPI_CHOLLA*/ return code; } - -void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) { - - Real cd,sd,cp,sp,ct,st; //sines and cosines - Real a00, a01, a02; //rotation matrix elements +void Rotate_Point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real *xp, Real *yp, Real *zp) +{ + Real cd, sd, cp, sp, ct, st; // sines and cosines + Real a00, a01, a02; // rotation matrix elements Real a10, a11, a12; Real a20, a21, a22; - //compute trig functions of rotation angles + // compute trig functions of rotation angles cd = cos(delta); sd = sin(delta); cp = cos(phi); @@ -3327,7 +2734,7 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real ct = cos(theta); st = sin(theta); - //compute the rotation matrix elements + // compute the rotation matrix elements /*a00 = cosp*cosd - sinp*cost*sind; a01 = -1.0*(cosp*sind + sinp*cost*cosd); a02 = sinp*sint; @@ -3339,32 +2746,119 @@ void rotate_point(Real x, Real y, Real z, Real delta, Real phi, Real theta, Real a20 = sint*sind; a21 = sint*cosd; a22 = cost;*/ - a00 = (cp*cd - sp*ct*sd); - a01 = -1.0*(cp*sd+sp*ct*cd); - a02 = sp*st; - a10 = (sp*cd + cp*ct*sd); - a11 = (cp*ct*cd -st*sd); - a12 = cp*st; - a20 = st*sd; - a21 = st*cd; + a00 = (cp * cd - sp * ct * sd); + a01 = -1.0 * (cp * sd + sp * ct * cd); + a02 = sp * st; + a10 = (sp * cd + cp * ct * sd); + a11 = (cp * ct * cd - st * sd); + a12 = cp * st; + a20 = st * sd; + a21 = st * cd; a22 = ct; - *xp = a00*x + a01*y + a02*z; - *yp = a10*x + a11*y + a12*z; - *zp = a20*x + a21*y + a22*z; - + *xp = a00 * x + a01 * y + a02 * z; + *yp = a10 * x + a11 * y + a12 * z; + *zp = a20 * x + a21 * y + a22 * z; } -void write_debug ( Real *Value, const char *fname, int nValues, int iProc ) - { +void Write_Debug(Real *Value, const char *fname, int nValues, int iProc) +{ char fn[1024]; int ret; sprintf(fn, "%s_%07d.txt", fname, iProc); FILE *fp = fopen(fn, "w"); - for ( int iV = 0; iV < nValues; iV++ ) + for (int iV = 0; iV < nValues; iV++) { fprintf(fp, "%e\n", Value[iV]); + } + + fclose(fp); +} - fclose (fp); +std::string FnameTemplate::effective_output_dir_path(int nfile) const noexcept +{ + // for consistency, ensure that the returned string always has a trailing "/" + if (outdir_.empty()) { + return "./"; + } else if (separate_cycle_dirs_) { + return this->outdir_ + "/" + std::to_string(nfile) + "/"; + } else { + // if the last character of outdir is not a '/', then the substring of + // characters after the final '/' (or entire string if there isn't any '/') + // is treated as a file-prefix + // + // this is accomplished here: + std::filesystem::path without_file_prefix = std::filesystem::path(this->outdir_).parent_path(); + return without_file_prefix.string() + "/"; } +} + +std::string FnameTemplate::format_fname(int nfile, const std::string &pre_extension_suffix) const noexcept +{ +#ifdef MPI_CHOLLA + int file_proc_id = procID; +#else + int file_proc_id = 0; +#endif + return format_fname(nfile, file_proc_id, pre_extension_suffix); +} + +std::string FnameTemplate::format_fname(int nfile, int file_proc_id, + const std::string &pre_extension_suffix) const noexcept +{ + // get the leading section of the string + const std::string path_prefix = + (separate_cycle_dirs_) + ? (effective_output_dir_path(nfile) + "/") // while redundant, the slash signals our intent + : outdir_; + + // get the file extension +#if defined BINARY + const char *extension = ".bin"; +#elif defined HDF5 + const char *extension = ".h5"; +#else + const char *extension = ".txt"; +#endif + + std::string procID_part = "." + std::to_string(file_proc_id); // initialized to empty string + + return path_prefix + std::to_string(nfile) + pre_extension_suffix + extension + procID_part; +} + +void Ensure_Dir_Exists(std::string dir_path) +{ + if (Is_Root_Proc()) { + // if the last character of outdir is not a '/', then the substring of + // characters after the final '/' (or entire string if there isn't any '/') + // is treated as a file-prefix + // + // this is accomplished here: + std::filesystem::path path = std::filesystem::path(dir_path); + + if (!dir_path.empty()) { + // try to create all directories specified within outdir (does nothing if + // the directories already exist) + std::error_code err_code; + std::filesystem::create_directories(path, err_code); + + // confirm that an error-code wasn't set & that the path actually refers + // to a directory (it's unclear from docs whether err-code is set in that + // case) + if (err_code or not std::filesystem::is_directory(path)) { + CHOLLA_ERROR( + "something went wrong while trying to create the path to the " + "directory: %s", + dir_path.c_str()); + } + } + } + + // this barrier ensures we won't ever encounter a scenario when 1 process + // tries to write a file to a non-existent directory before the root process + // has a chance to create it +#ifdef MPI_CHOLLA + MPI_Barrier(world); +#endif +} diff --git a/src/io/io.h b/src/io/io.h index f7dfe6eb7..d8f6ca8ca 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -1,44 +1,138 @@ #pragma once -#include "../global/global.h" -#include "../grid/grid3D.h" +#include #include +#include +#include "../global/global.h" +#include "../grid/grid3D.h" /* Write the data */ -void WriteData(Grid3D &G, struct parameters P, int nfile); +void Write_Data(Grid3D& G, struct Parameters P, int nfile); /* Output the grid data to file. */ -void OutputData(Grid3D &G, struct parameters P, int nfile); +void Output_Data(Grid3D& G, struct Parameters P, int nfile); /* Output the grid data to file as 32-bit floats. */ -void OutputFloat32(Grid3D &G, struct parameters P, int nfile); +void Output_Float32(Grid3D& G, struct Parameters P, int nfile); /* Output a projection of the grid data to file. */ -void OutputProjectedData(Grid3D &G, struct parameters P, int nfile); +void Output_Projected_Data(Grid3D& G, struct Parameters P, int nfile); /* Output a rotated projection of the grid data to file. */ -void OutputRotatedProjectedData(Grid3D &G, struct parameters P, int nfile); +void Output_Rotated_Projected_Data(Grid3D& G, struct Parameters P, int nfile); /* Output xy, xz, and yz slices of the grid data to file. */ -void OutputSlices(Grid3D &G, struct parameters P, int nfile); +void Output_Slices(Grid3D& G, struct Parameters P, int nfile); /* MPI-safe printf routine */ -int chprintf(const char * __restrict sdata, ...); +int chprintf(const char* __restrict sdata, ...); + +/*! + * \brief Convert a floating point number to a string such that it can be + * exactly deserialized back from a string to the same floating point number. + * + * \tparam T Any floating point type + * \param[in] input The floating point number to convert + * \return std::string The string representation of the input floating point + */ +template +std::string to_string_exact(T const& input) +{ + std::stringstream output; + output << std::setprecision(std::numeric_limits::max_digits10); + output << input; + return output.str(); +} + +void Create_Log_File(struct Parameters P); + +void Write_Message_To_Log_File(const char* message); + +void Write_Debug(Real* Value, const char* fname, int nValues, int iProc); + +/* Lightweight object designed to centralize the file-naming logic (& any associated configuration). + * + * Cholla pathnames traditionally followed the following template: + * "{outdir}{nfile}{pre_extension_suffix}{extension}.{proc_id}" + * where each curly-braced token represents a different variable. In detail: + * - `{outdir}` is the parameter from the parameter file. The historical behavior (that we currently + * maintain), if this is non-empty, then all charaters following the last '/' are treated as a + * prefix to the output file name (if there aren't any '/' characters, then the whole string is + * effectively a prefix. + * - `{nfile}` is the current file-output count. + * - `{pre_extension_suffix}` is the pre-hdf5-extension suffix. It's the suffix that precedes the + * file extension (or `{extension}`) + * - `{extension}` is the filename extension. Examples include ".h5" or ".bin" or ".txt". + * - `{proc_id}` represents the process-id that held the data that will be written to this file. + * Previously, in non-MPI runs, this was omitted. + * + * Instances can be configured to support the following newer file-naming template + * "{outdir}/{nfile}/{nfile}{pre_extension_suffix}{extension}.{proc_id}" + * where the the significance of each curly-braced token is largely unchanged. There are 2 things + * worth noting: + * - all files written at a single simulation-cycle are now grouped in a single directory + * - `{outdir}` never specifies a file prefix. When `{outdir}` is empty, it is treated as "./". + * Otherwise, we effectively append '/' to the end of `{outdir}` + * + * \note + * This could probably pull double-duty and get reused with infile. + */ +class FnameTemplate +{ + public: + FnameTemplate() = delete; -void Create_Log_File( struct parameters P ); + FnameTemplate(bool separate_cycle_dirs, std::string outdir) + : separate_cycle_dirs_(separate_cycle_dirs), outdir_(std::move(outdir)) + { + } -void Write_Message_To_Log_File( const char* message ); + FnameTemplate(const Parameters& P) : FnameTemplate(not P.legacy_flat_outdir, P.outdir) {} -void write_debug ( Real *Value, const char *fname, int nValues, int iProc ); + /* Specifies whether separate cycles are written to separate directories */ + bool separate_cycle_dirs() const noexcept { return separate_cycle_dirs_; } + + /* Returns the effective output-directory used for outputs at a given simulation-cycle */ + std::string effective_output_dir_path(int nfile) const noexcept; + + /* format the file path */ + std::string format_fname(int nfile, const std::string& pre_extension_suffix) const noexcept; + + std::string format_fname(int nfile, int file_proc_id, const std::string& pre_extension_suffix) const noexcept; + + private: + bool separate_cycle_dirs_; + std::string outdir_; +}; + +/* Checks whether the directories referred to within outdir exist. Creates them + * if they don't. It gracefully handles cases where outdir contains a prefix + * for the output files. + */ +void Ensure_Dir_Exists(std::string dir_path); #ifdef HDF5 // From io/io.cpp -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); -herr_t HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); + +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, double* attribute, const char* name); +herr_t Write_HDF5_Attribute(hid_t file_id, hid_t dataspace_id, int* attribute, const char* name); + +herr_t Read_HDF5_Dataset(hid_t file_id, double* dataset_buffer, const char* name); +herr_t Read_HDF5_Dataset(hid_t file_id, float* dataset_buffer, const char* name); + +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, double* dataset_buffer, const char* name); +herr_t Write_HDF5_Dataset(hid_t file_id, hid_t dataspace_id, float* dataset_buffer, const char* name); + +/* \brief After HDF5 reads data into a buffer, remap and write to grid buffer. */ +void Fill_Grid_From_HDF5_Buffer(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real* hdf5_buffer, Real* grid_buffer); // From io/io_gpu.cu -// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, then write HDF5 field -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* source, const char* name); -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* source, const char* name); +// Use GPU to pack source -> device_buffer, then copy device_buffer -> buffer, +// then write HDF5 field +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + float* buffer, float* device_buffer, Real* source, const char* name, int mhd_direction = -1); +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + double* buffer, double* device_buffer, Real* source, const char* name, int mhd_direction = -1); #endif diff --git a/src/io/io_gpu.cu b/src/io/io_gpu.cu index c6cab6e8a..a793ab792 100644 --- a/src/io/io_gpu.cu +++ b/src/io/io_gpu.cu @@ -1,110 +1,182 @@ // Require HDF5 #ifdef HDF5 -#include + #include -#include "../grid/grid3D.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/cuda_utilities.h" -#include "../io/io.h" // To provide io.h with OutputViz3D +// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real +// whereas the conserved variables have size nx,ny,nz. -// Note that the HDF5 file and buffer will have size nx_real * ny_real * nz_real whereas the conserved variables have size nx,ny,nz -// Note that magnetic fields add +1 to nx_real ny_real nz_real since an extra face needs to be output, but also has the same size nx ny nz -// For the magnetic field case, a different nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. +// Note that magnetic fields +// add +1 to nx_real ny_real nz_real since an extra face needs to be output, but +// also has the same size nx ny nz. -// Copy Real (non-ghost) cells from source to a double destination (for writing HDF5 in double precision) -__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, double* destination, Real* source) +// For the magnetic field case, a different +// nx_real+1 ny_real+1 nz_real+1 n_ghost-1 are provided as inputs. + +// 2D version of CopyReal3D_GPU_Kernel. Note that magnetic fields and float32 output are not enabled in 2-D so this is a +// simpler kernel +__global__ void CopyReal2D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, + Real* destination, Real* source) { + int const id = threadIdx.x + blockIdx.x * blockDim.x; + + int i, j, k; + cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k); + // i goes up to nx_real + // j goes up to ny_real + // for 2D, k should be 0 + if (k >= 1) { + return; + } - int dest_id,source_id,id,i,j,k; - id = threadIdx.x + blockIdx.x * blockDim.x; + // This converts into HDF5 indexing that plays well with Python + int const dest_id = j + i * ny_real; + int const source_id = (i + n_ghost) + (j + n_ghost) * nx; - k = id/(nx_real*ny_real); - j = (id - k*nx_real*ny_real)/nx_real; - i = id - j*nx_real - k*nx_real*ny_real; + destination[dest_id] = source[source_id]; +} + +// Copy Real (non-ghost) cells from source to a double destination (for writing +// HDF5 in double precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, + double* destination, Real* source, int mhd_direction) +{ + int const id = threadIdx.x + blockIdx.x * blockDim.x; + + int i, j, k; + cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k); if (k >= nz_real) { return; } // This converts into HDF5 indexing that plays well with Python - dest_id = k + j*nz_real + i*ny_real*nz_real; - source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + int const dest_id = k + j * nz_real + i * ny_real * nz_real; + int const source_id = (i + n_ghost - int(mhd_direction == 0)) + (j + n_ghost - int(mhd_direction == 1)) * nx + + (k + n_ghost - int(mhd_direction == 2)) * nx * ny; - destination[dest_id] = (double) source[source_id]; + destination[dest_id] = (double)source[source_id]; } -// Copy Real (non-ghost) cells from source to a float destination (for writing HDF5 in float precision) -__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, float* destination, Real* source) +// Copy Real (non-ghost) cells from source to a float destination (for writing +// HDF5 in float precision) +__global__ void CopyReal3D_GPU_Kernel(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, + float* destination, Real* source, int mhd_direction) { + int const id = threadIdx.x + blockIdx.x * blockDim.x; - int dest_id,source_id,id,i,j,k; - id = threadIdx.x + blockIdx.x * blockDim.x; - - k = id/(nx_real*ny_real); - j = (id - k*nx_real*ny_real)/nx_real; - i = id - j*nx_real - k*nx_real*ny_real; + int i, j, k; + cuda_utilities::compute3DIndices(id, nx_real, ny_real, i, j, k); if (k >= nz_real) { return; } - // This converts into HDF5 indexing that plays well with Python - dest_id = k + j*nz_real + i*ny_real*nz_real; - source_id = (i+n_ghost) + (j+n_ghost)*nx + (k+n_ghost)*nx*ny; + // This converts into HDF5 indexing that plays well with Python. + // The `int(mhd_direction == NUM)` sections provide appropriate shifts for writing out the magnetic fields since they + // need an extra cell in the same direction as the field + int const dest_id = k + j * nz_real + i * ny_real * nz_real; + int const source_id = (i + n_ghost - int(mhd_direction == 0)) + (j + n_ghost - int(mhd_direction == 1)) * nx + + (k + n_ghost - int(mhd_direction == 2)) * nx * ny; - destination[dest_id] = (float) source[source_id]; + destination[dest_id] = (float)source[source_id]; } -// When buffer is double, automatically use the double version of everything using function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, double* buffer, double* device_buffer, Real* device_source, const char* name) +// When buffer is double, automatically use the double version of everything +// using function overloading +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + double* buffer, double* device_buffer, Real* device_source, const char* name, + int mhd_direction) { herr_t status; hsize_t dims[3]; - dims[0] = nx_real; - dims[1] = ny_real; - dims[2] = nz_real; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - //Copy non-ghost parts of source to buffer - dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + // Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); - CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(double), cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_buffer, device_source, mhd_direction); + GPU_Error_Check( + cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(double), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 - status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); status = H5Sclose(dataspace_id); - if (status < 0) {printf("File write failed.\n");} - - + if (status < 0) { + printf("File write failed.\n"); + } } - -// When buffer is float, automatically use the float version of everything using function overloading -void WriteHDF5Field3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, float* buffer, float* device_buffer, Real* device_source, const char* name) +// When buffer is float, automatically use the float version of everything using +// function overloading +void Write_HDF5_Field_3D(int nx, int ny, int nx_real, int ny_real, int nz_real, int n_ghost, hid_t file_id, + float* buffer, float* device_buffer, Real* device_source, const char* name, int mhd_direction) { - herr_t status; hsize_t dims[3]; - dims[0] = nx_real; - dims[1] = ny_real; - dims[2] = nz_real; + dims[0] = nx_real; + dims[1] = ny_real; + dims[2] = nz_real; hid_t dataspace_id = H5Screate_simple(3, dims, NULL); - //Copy non-ghost parts of source to buffer - dim3 dim1dGrid((nx_real*ny_real*nz_real+TPB-1)/TPB, 1, 1); + // Copy non-ghost parts of source to buffer + dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); dim3 dim1dBlock(TPB, 1, 1); - hipLaunchKernelGGL(CopyReal3D_GPU_Kernel,dim1dGrid,dim1dBlock,0,0,nx,ny,nx_real,ny_real,nz_real,n_ghost,device_buffer,device_source); - CudaSafeCall(cudaMemcpy( buffer, device_buffer, nx_real*ny_real*nz_real*sizeof(float), cudaMemcpyDeviceToHost)); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_buffer, device_source, mhd_direction); + GPU_Error_Check( + cudaMemcpy(buffer, device_buffer, nx_real * ny_real * nz_real * sizeof(float), cudaMemcpyDeviceToHost)); // Write Buffer to HDF5 - status = HDF5_Dataset(file_id, dataspace_id, buffer, name); + status = Write_HDF5_Dataset(file_id, dataspace_id, buffer, name); status = H5Sclose(dataspace_id); - if (status < 0) {printf("File write failed.\n");} - + if (status < 0) { + printf("File write failed.\n"); + } } +void Fill_HDF5_Buffer_From_Grid_GPU(int nx, int ny, int nz, int nx_real, int ny_real, int nz_real, int n_ghost, + Real* hdf5_buffer, Real* device_hdf5_buffer, Real* device_grid_buffer) +{ + int mhd_direction = -1; + + // 3D case + if (nx > 1 && ny > 1 && nz > 1) { + dim3 dim1dGrid((nx_real * ny_real * nz_real + TPB - 1) / TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal3D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_hdf5_buffer, device_grid_buffer, mhd_direction); + GPU_Error_Check(cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * nz_real * sizeof(Real), + cudaMemcpyDeviceToHost)); + return; + } + // 2D case + if (nx > 1 && ny > 1 && nz == 1) { + dim3 dim1dGrid((nx_real * ny_real + TPB - 1) / TPB, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(CopyReal2D_GPU_Kernel, dim1dGrid, dim1dBlock, 0, 0, nx, ny, nx_real, ny_real, nz_real, n_ghost, + device_hdf5_buffer, device_grid_buffer); + GPU_Error_Check( + cudaMemcpy(hdf5_buffer, device_hdf5_buffer, nx_real * ny_real * sizeof(Real), cudaMemcpyDeviceToHost)); + return; + } + + // 1D case + if (nx > 1 && ny == 1 && nz == 1) { + GPU_Error_Check( + cudaMemcpy(hdf5_buffer, device_grid_buffer + n_ghost, nx_real * sizeof(Real), cudaMemcpyDeviceToHost)); + return; + } +} -#endif //HDF5 +#endif // HDF5 diff --git a/src/io/io_parallel.cpp b/src/io/io_parallel.cpp new file mode 100644 index 000000000..22257b1fc --- /dev/null +++ b/src/io/io_parallel.cpp @@ -0,0 +1,141 @@ +// Routines for using Parallel HDF5 to read/write from single file +#include "../grid/grid3D.h" +#include "../io/io.h" +#include "../utils/error_handling.h" + +#if defined(HDF5) && defined(MPI_CHOLLA) + #include + + #include "../mpi/mpi_routines.h" + #include "../utils/timing_functions.h" // provides ScopedTimer + +// Warning: H5Sselect_hyperslab expects its pointer args to be arrays of same size as the rank of the dataspace +// file_space_id +void Read_HDF5_Selection_3D(hid_t file_id, hsize_t* offset, hsize_t* count, double* buffer, const char* name) +{ + hid_t dataset_id = H5Dopen(file_id, name, H5P_DEFAULT); + // Select the requested subset of data + hid_t file_space_id = H5Dget_space(dataset_id); + hid_t mem_space_id = H5Screate_simple(3, count, NULL); + + // Notes on hyperslab call: + + // First NULL is stride, setting to NULL is like setting to 1, contiguous + + // Second NULL is block, setting to NULL sets block size to 1. + + // Count is the number of blocks in each dimension: + + // since our block size is 1, Count is the number of voxels in each dimension + + herr_t status = H5Sselect_hyperslab(file_space_id, H5S_SELECT_SET, offset, NULL, count, NULL); + // Read in the data subset + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, mem_space_id, file_space_id, H5P_DEFAULT, buffer); + + // Free the ids + status = H5Sclose(mem_space_id); + status = H5Sclose(file_space_id); + status = H5Dclose(dataset_id); +} + +// Alwin: I'm only writing a 3D version of this because that's what is practical. +// Read from concatenated HDF5 file +void Read_Grid_Cat_HDF5_Field(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count, + Real* grid_buffer, const char* name) +{ + Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real, H.ny_real, H.nz_real, H.n_ghost, dataset_buffer, grid_buffer); +} + +void Read_Grid_Cat_HDF5_Field_Magnetic(hid_t file_id, Real* dataset_buffer, Header H, hsize_t* offset, hsize_t* count, + Real* grid_buffer, const char* name) +{ + Read_HDF5_Selection_3D(file_id, offset, count, dataset_buffer, name); + Fill_Grid_From_HDF5_Buffer(H.nx, H.ny, H.nz, H.nx_real + 1, H.ny_real + 1, H.nz_real + 1, H.n_ghost - 1, + dataset_buffer, grid_buffer); +} + +/*! \brief Read in grid data from a single concatenated output file. */ +void Grid3D::Read_Grid_Cat(struct Parameters P) +{ + ScopedTimer timer("Read_Grid_Cat"); + herr_t status; + char filename[100]; + + sprintf(filename, "%s%d.h5", P.indir, P.nfile); + + hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + + if (file_id < 0) { + printf("Unable to open input file: %s\n", filename); + exit(0); + } + + // TODO (written by Alwin, for anyone to do) : + // Consider using collective calls if this part is slow at scale + hid_t attribute_id; + attribute_id = H5Aopen(file_id, "t", H5P_DEFAULT); + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &H.t); + status = H5Aclose(attribute_id); + attribute_id = H5Aopen(file_id, "n_step", H5P_DEFAULT); + status = H5Aread(attribute_id, H5T_NATIVE_INT, &H.n_step); + status = H5Aclose(attribute_id); + + // Offsets are global variables from mpi_routines.h + hsize_t offset[3]; + offset[0] = nx_local_start; + offset[1] = ny_local_start; + offset[2] = nz_local_start; + + // This is really dims but I name it count because that's what HDF5 names it + hsize_t count[3]; + count[0] = H.nx_real; + count[1] = H.ny_real; + count[2] = H.nz_real; + + #ifdef MHD + Real* dataset_buffer = (Real*)malloc((H.nz_real + 1) * (H.ny_real + 1) * (H.nx_real + 1) * sizeof(Real)); + #else + Real* dataset_buffer = (Real*)malloc((H.nz_real) * (H.ny_real) * (H.nx_real) * sizeof(Real)); + #endif + + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.density, "/density"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_x, "/momentum_x"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_y, "/momentum_y"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.momentum_z, "/momentum_z"); + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.Energy, "/Energy"); + #ifdef DE + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.GasEnergy, "/GasEnergy"); + #endif // DE + + #ifdef SCALAR + #ifdef BASIC_SCALAR + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.basic_scalar, "/scalar0"); + #endif + #ifdef DUST + Read_Grid_Cat_HDF5_Field(file_id, dataset_buffer, H, offset, count, C.dust_density, "/dust_density"); + #endif + #endif + // TODO (Alwin) : add scalar stuff + + #ifdef MHD + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_x, "/magnetic_x"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_y, "/magnetic_y"); + Read_Grid_Cat_HDF5_Field_Magnetic(file_id, dataset_buffer, H, offset, count, C.magnetic_z, "/magnetic_z"); + #endif + + free(dataset_buffer); + status = H5Fclose(file_id); +} + +#else + +void Grid3D::Read_Grid_Cat(struct Parameters P) +{ + chprintf("Warning: Read_Grid_Cat does nothing without MPI_CHOLLA and HDF5\n"); + chexit(-1); + return; + // Does nothing without HDF5 and MPI_CHOLLA +} + +#endif diff --git a/src/io/io_tests.cpp b/src/io/io_tests.cpp new file mode 100644 index 000000000..30b43f644 --- /dev/null +++ b/src/io/io_tests.cpp @@ -0,0 +1,45 @@ +/*! + * \file io_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains all the system tests for code in io.h and io.cpp + * + */ + +// External Libraries and Headers +#include + +// Local includes +#include "../io/io.h" +#include "../system_tests/system_tester.h" + +// STL includes +#include +#include + +// ============================================================================= +TEST(tHYDROtMHDReadGridHdf5, RestartSlowWaveExpectCorrectOutput) +{ + // Set parameters + int const num_ranks = 4; + std::string restart_nfile_str = "0"; + + // Generate the data to read from + system_test::SystemTestRunner initializer(false, true, false); + initializer.numMpiRanks = num_ranks; + initializer.chollaLaunchParams.append(" tout=0.0 outstep=0.0"); + initializer.launchCholla(); + std::string const read_directory = initializer.getOutputDirectory() + "/" + restart_nfile_str + "/"; + + // Reload data and run the test + system_test::SystemTestRunner loadRun(false, true, false); + loadRun.numMpiRanks = num_ranks; + loadRun.chollaLaunchParams.append(" init=Read_Grid nfile=" + restart_nfile_str + " indir=" + read_directory); + +#ifdef MHD + loadRun.setFiducialNumTimeSteps(854); +#else // not MHD + loadRun.setFiducialNumTimeSteps(427); +#endif // MHD + loadRun.runL1ErrorTest(4.2E-7, 5.4E-7); +} +// ============================================================================= \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 0df8bcfb4..758b9f54f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,51 +2,69 @@ * \brief Program to run the grid code. */ #ifdef MPI_CHOLLA -#include -#include "mpi/mpi_routines.h" + #include + + #include "mpi/mpi_routines.h" #endif +#include #include #include -#include #include + #include "global/global.h" #include "grid/grid3D.h" #include "io/io.h" +#include "utils/cuda_utilities.h" #include "utils/error_handling.h" +#ifdef SUPERNOVA + #include "particles/supernova.h" + #ifdef ANALYSIS + #include "analysis/feedback_analysis.h" + #endif +#endif // SUPERNOVA +#ifdef STAR_FORMATION + #include "particles/star_formation.h" +#endif +#ifdef MHD + #include "mhd/magnetic_divergence.h" +#endif // MHD + +#include "grid/grid_enum.h" int main(int argc, char *argv[]) { // timing variables double start_total, stop_total, start_step, stop_step; - #ifdef CPU_TIME +#ifdef CPU_TIME double stop_init, init_min, init_max, init_avg; double start_bound, stop_bound, bound_min, bound_max, bound_avg; double start_hydro, stop_hydro, hydro_min, hydro_max, hydro_avg; double init, bound, hydro; init = bound = hydro = 0; - #endif //CPU_TIME +#endif // CPU_TIME // start the total time - start_total = get_time(); + start_total = Get_Time(); +#ifdef MPI_CHOLLA /* Initialize MPI communication */ - #ifdef MPI_CHOLLA InitializeChollaMPI(&argc, &argv); - #endif /*MPI_CHOLLA*/ +#else + // Initialize subset of global parallelism variables usually managed by MPI + Init_Global_Parallel_Vars_No_MPI(); +#endif /*MPI_CHOLLA*/ - Real dti = 0; // inverse time step, 1.0 / dt + Real dti = 0; // inverse time step, 1.0 / dt // input parameter variables char *param_file; - struct parameters P; - int nfile = 0; // number of output files - Real outtime = 0; // current output time - + struct Parameters P; + int nfile = 0; // number of output files + Real outtime = 0; // current output time // read in command line arguments - if (argc < 2) - { + if (argc < 2) { chprintf("usage: %s \n", argv[0]); chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); @@ -59,118 +77,155 @@ int main(int argc, char *argv[]) Grid3D G; // read in the parameters - parse_params (param_file, &P, argc, argv); + Parse_Params(param_file, &P, argc, argv); // and output to screen chprintf("Git Commit Hash = %s\n", GIT_HASH); chprintf("Macro Flags = %s\n", MACRO_FLAGS); - chprintf ("Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, boundaries = %d %d %d %d %d %d\n", - P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); - if (strcmp(P.init, "Read_Grid") == 0 ) chprintf ("Input directory: %s\n", P.indir); - chprintf ("Output directory: %s\n", P.outdir); + chprintf( + "Parameter values: nx = %d, ny = %d, nz = %d, tout = %f, init = %s, " + "boundaries = %d %d %d %d %d %d\n", + P.nx, P.ny, P.nz, P.tout, P.init, P.xl_bcnd, P.xu_bcnd, P.yl_bcnd, P.yu_bcnd, P.zl_bcnd, P.zu_bcnd); - //Create a Log file to output run-time messages and output the git hash and - //macro flags used + bool is_restart = false; + if (strcmp(P.init, "Read_Grid") == 0) { + is_restart = true; + } + if (strcmp(P.init, "Read_Grid_Cat") == 0) { + is_restart = true; + } + + if (is_restart) { + chprintf("Input directory: %s\n", P.indir); + } + chprintf("Output directory: %s\n", P.outdir); + + // Check the configuration + Check_Configuration(P); + + // Create a Log file to output run-time messages and output the git hash and + // macro flags used Create_Log_File(P); std::string message = "Git Commit Hash = " + std::string(GIT_HASH); - Write_Message_To_Log_File( message.c_str() ); + Write_Message_To_Log_File(message.c_str()); message = "Macro Flags = " + std::string(MACRO_FLAGS); - Write_Message_To_Log_File( message.c_str() ); - - + Write_Message_To_Log_File(message.c_str()); // initialize the grid G.Initialize(&P); chprintf("Local number of grid cells: %d %d %d %d\n", G.H.nx_real, G.H.ny_real, G.H.nz_real, G.H.n_cells); message = "Initializing Simulation"; - Write_Message_To_Log_File( message.c_str() ); + Write_Message_To_Log_File(message.c_str()); - // Set initial conditions and calculate first dt + // Set initial conditions chprintf("Setting initial conditions...\n"); G.Set_Initial_Conditions(P); chprintf("Initial conditions set.\n"); - // set main variables for Read_Grid initial conditions - if (strcmp(P.init, "Read_Grid") == 0) { - dti = C_cfl / G.H.dt; + // set main variables for Read_Grid and Read_Grid_Cat initial conditions + if (is_restart) { outtime += G.H.t; nfile = P.nfile; } - #ifdef DE - chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2 ); - message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); - Write_Message_To_Log_File( message.c_str() ); - #endif +#ifdef DE + chprintf("\nUsing Dual Energy Formalism:\n eta_1: %0.3f eta_2: %0.4f\n", DE_ETA_1, DE_ETA_2); + message = " eta_1: " + std::to_string(DE_ETA_1) + " eta_2: " + std::to_string(DE_ETA_2); + Write_Message_To_Log_File(message.c_str()); +#endif - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.Initialize(); - #endif +#endif - #ifdef GRAVITY +#ifdef GRAVITY G.Initialize_Gravity(&P); - #endif +#endif - #ifdef PARTICLES +#ifdef PARTICLES G.Initialize_Particles(&P); - #endif +#endif - #ifdef COSMOLOGY +#ifdef COSMOLOGY G.Initialize_Cosmology(&P); - #endif +#endif - #ifdef COOLING_GRACKLE +#ifdef COOLING_GRACKLE G.Initialize_Grackle(&P); - #endif +#endif - #ifdef CHEMISTRY_GPU +#ifdef CHEMISTRY_GPU G.Initialize_Chemistry(&P); - #endif +#endif - #ifdef ANALYSIS - G.Initialize_Analysis_Module(&P); - if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); - #endif +#ifdef ANALYSIS + G.Initialize_AnalysisModule(&P); + if (G.Analysis.Output_Now) { + G.Compute_and_Output_Analysis(&P); + } +#endif + +#if defined(SUPERNOVA) && defined(PARTICLE_AGE) + FeedbackAnalysis sn_analysis(G); + #ifdef MPI_CHOLLA + supernova::initState(&P, G.Particles.n_total_initial); + #else + supernova::initState(&P, G.Particles.n_local); + #endif // MPI_CHOLLA +#endif // SUPERNOVA && PARTICLE_AGE - #ifdef GRAVITY +#ifdef STAR_FORMATION + star_formation::Initialize(G); +#endif + +#ifdef GRAVITY_ANALYTIC_COMP + G.Setup_Analytic_Potential(&P); +#endif + +#ifdef GRAVITY // Get the gravitational potential for the first timestep - G.Compute_Gravitational_Potential( &P); - #endif + G.Compute_Gravitational_Potential(&P); +#endif - // Set boundary conditions (assign appropriate values to ghost cells) for hydro and potential + // Set boundary conditions (assign appropriate values to ghost cells) for + // hydro and potential chprintf("Setting boundary conditions...\n"); G.Set_Boundary_Conditions_Grid(P); chprintf("Boundary conditions set.\n"); - #ifdef GRAVITY_ANALYTIC_COMP - // add analytic component to gravity potential. - G.Add_Analytic_Potential(&P); - #endif +#ifdef GRAVITY_ANALYTIC_COMP + G.Add_Analytic_Potential(); +#endif - #ifdef PARTICLES +#ifdef PARTICLES // Get the particles acceleration for the first timestep G.Get_Particles_Acceleration(); - #endif +#endif chprintf("Dimensions of each cell: dx = %f dy = %f dz = %f\n", G.H.dx, G.H.dy, G.H.dz); - chprintf("Ratio of specific heats gamma = %f\n",gama); - chprintf("Nstep = %d Timestep = %f Simulation time = %f\n", G.H.n_step, G.H.dt, G.H.t); - + chprintf("Ratio of specific heats gamma = %f\n", gama); + chprintf("Nstep = %d Simulation time = %f\n", G.H.n_step, G.H.t); - #ifdef OUTPUT - if (strcmp(P.init, "Read_Grid") != 0 || G.H.Output_Now ) { +#ifdef OUTPUT + if (!is_restart || G.H.Output_Now) { // write the initial conditions to file chprintf("Writing initial conditions to file...\n"); - WriteData(G, P, nfile); + Write_Data(G, P, nfile); } // add one to the output file count nfile++; - #endif //OUTPUT +#endif // OUTPUT + +#ifdef MHD + // Check that the initial magnetic field has zero divergence + mhd::checkMagneticDivergence(G); +#endif // MHD + // increment the next output time outtime += P.outstep; - #ifdef CPU_TIME - stop_init = get_time(); - init = stop_init - start_total; +#ifdef CPU_TIME + stop_init = Get_Time(); + init = stop_init - start_total; #ifdef MPI_CHOLLA init_min = ReduceRealMin(init); init_max = ReduceRealMax(init); @@ -178,34 +233,44 @@ int main(int argc, char *argv[]) chprintf("Init min: %9.4f max: %9.4f avg: %9.4f\n", init_min, init_max, init_avg); #else printf("Init %9.4f\n", init); - #endif //MPI_CHOLLA - #endif //CPU_TIME + #endif // MPI_CHOLLA +#endif // CPU_TIME // Evolve the grid, one timestep at a time chprintf("Starting calculations.\n"); message = "Starting calculations."; - Write_Message_To_Log_File( message.c_str() ); - while (G.H.t < P.tout) - { - // get the start time - #ifdef CPU_TIME + Write_Message_To_Log_File(message.c_str()); + + // Compute inverse timestep for the first time + dti = G.Calc_Inverse_Timestep(); + + while (G.H.t < P.tout) { +// get the start time +#ifdef CPU_TIME G.Timer.Total.Start(); - #endif //CPU_TIME - start_step = get_time(); +#endif // CPU_TIME + start_step = Get_Time(); - // calculate the timestep. Note: this computes the timestep ONLY on the - // first loop, on subsequent time steps it just calls the MPI_Allreduce to - // determine the global timestep + // calculate the timestep by calling MPI_Allreduce G.set_dt(dti); - if (G.H.t + G.H.dt > outtime) G.H.dt = outtime - G.H.t; + // adjust timestep based on the next available scheduled time + const Real next_scheduled_time = fmin(outtime, P.tout); + if (G.H.t + G.H.dt > next_scheduled_time) { + G.H.dt = next_scheduled_time - G.H.t; + } + +#if defined(SUPERNOVA) && defined(PARTICLE_AGE) + supernova::Cluster_Feedback(G, sn_analysis); +#endif // SUPERNOVA && PARTICLE_AGE - #ifdef PARTICLES - //Advance the particles KDK( first step ): Velocities are updated by 0.5*dt and positions are updated by dt - G.Advance_Particles( 1 ); - //Transfer the particles that moved outside the local domain +#ifdef PARTICLES + // Advance the particles KDK( first step ): Velocities are updated by 0.5*dt + // and positions are updated by dt + G.Advance_Particles(1); + // Transfer the particles that moved outside the local domain G.Transfer_Particles_Boundaries(P); - #endif +#endif // Advance the grid by one timestep dti = G.Update_Hydro_Grid(); @@ -213,112 +278,120 @@ int main(int argc, char *argv[]) // update the simulation time ( t += dt ) G.Update_Time(); - - #ifdef GRAVITY - //Compute Gravitational potential for next step - G.Compute_Gravitational_Potential( &P); - #endif +#ifdef GRAVITY + // Compute Gravitational potential for next step + G.Compute_Gravitational_Potential(&P); +#endif // add one to the timestep count G.H.n_step++; - //Set the Grid boundary conditions for next time step + // Set the Grid boundary conditions for next time step G.Set_Boundary_Conditions_Grid(P); - #ifdef GRAVITY_ANALYTIC_COMP - // add analytic component to gravity potential. - G.Add_Analytic_Potential(&P); - #endif +#ifdef GRAVITY_ANALYTIC_COMP + G.Add_Analytic_Potential(); +#endif - #ifdef PARTICLES - ///Advance the particles KDK( second step ): Velocities are updated by 0.5*dt using the Accelerations at the new positions - G.Advance_Particles( 2 ); - #endif +#ifdef PARTICLES + /// Advance the particles KDK( second step ): Velocities are updated by + /// 0.5*dt using the Accelerations at the new positions + G.Advance_Particles(2); +#endif - #ifdef PARTICLE_AGE - //G.Cluster_Feedback(); - #endif +#ifdef STAR_FORMATION + star_formation::Star_Formation(G); +#endif - #ifdef CPU_TIME +#ifdef CPU_TIME + cuda_utilities::Print_GPU_Memory_Usage(); G.Timer.Total.End(); - #endif //CPU_TIME +#endif // CPU_TIME - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.Print_Times(); - #endif +#endif // get the time to compute the total timestep - stop_step = get_time(); - stop_total = get_time(); - G.H.t_wall = stop_total-start_total; - #ifdef MPI_CHOLLA + stop_step = Get_Time(); + stop_total = Get_Time(); + G.H.t_wall = stop_total - start_total; +#ifdef MPI_CHOLLA G.H.t_wall = ReduceRealMax(G.H.t_wall); - #endif - chprintf("n_step: %d sim time: %10.7f sim timestep: %7.4e timestep time = %9.3f ms total time = %9.4f s\n\n", - G.H.n_step, G.H.t, G.H.dt, (stop_step-start_step)*1000, G.H.t_wall); +#endif + chprintf( + "n_step: %d sim time: %10.7f sim timestep: %7.4e timestep time = " + "%9.3f ms total time = %9.4f s\n\n", + G.H.n_step, G.H.t, G.H.dt, (stop_step - start_step) * 1000, G.H.t_wall); - #ifdef OUTPUT_ALWAYS - G.H.Output_Now = true; - #endif + if (P.output_always) G.H.Output_Now = true; - #ifdef ANALYSIS - if ( G.Analysis.Output_Now ) G.Compute_and_Output_Analysis(&P); - #endif +#ifdef ANALYSIS + if (G.Analysis.Output_Now) { + G.Compute_and_Output_Analysis(&P); + } + #if defined(SUPERNOVA) && defined(PARTICLE_AGE) + sn_analysis.Compute_Gas_Velocity_Dispersion(G); + #endif +#endif - // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0) G.H.Output_Now = true; + // if ( P.n_steps_output > 0 && G.H.n_step % P.n_steps_output == 0) + // G.H.Output_Now = true; - if (G.H.t == outtime || G.H.Output_Now ) - { - #ifdef OUTPUT + if (G.H.t == outtime || G.H.Output_Now) { +#ifdef OUTPUT /*output the grid data*/ - WriteData(G, P, nfile); + Write_Data(G, P, nfile); // add one to the output file count nfile++; - #endif //OUTPUT - // update to the next output time - outtime += P.outstep; +#endif // OUTPUT + if (G.H.t == outtime) { + outtime += P.outstep; // update to the next output time + } } - #ifdef CPU_TIME +#ifdef CPU_TIME G.Timer.n_steps += 1; - #endif +#endif - #ifdef N_STEPS_LIMIT +#ifdef N_STEPS_LIMIT // Exit the loop when reached the limit number of steps (optional) - if ( G.H.n_step == N_STEPS_LIMIT) { - WriteData(G, P, nfile); + if (G.H.n_step == N_STEPS_LIMIT) { + #ifdef OUTPUT + Write_Data(G, P, nfile); + #endif // OUTPUT break; } - #endif - +#endif - #ifdef COSMOLOGY +#ifdef COSMOLOGY // Exit the loop when reached the last scale_factor output - if ( G.Cosmo.exit_now ) { - chprintf( "\nReached Last Cosmological Output: Ending Simulation\n"); + if (G.Cosmo.exit_now) { + chprintf("\nReached Last Cosmological Output: Ending Simulation\n"); break; } - #endif - - - } /*end loop over timesteps*/ +#endif +#ifdef MHD + // Check that the magnetic field has zero divergence + mhd::checkMagneticDivergence(G); +#endif // MHD + } /*end loop over timesteps*/ - #ifdef CPU_TIME +#ifdef CPU_TIME // Print timing statistics - G.Timer.Print_Average_Times( P ); - #endif + G.Timer.Print_Average_Times(P); +#endif message = "Simulation completed successfully."; - Write_Message_To_Log_File( message.c_str() ); + Write_Message_To_Log_File(message.c_str()); // free the grid G.Reset(); - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif /*MPI_CHOLLA*/ +#endif /*MPI_CHOLLA*/ return 0; - } diff --git a/src/main_tests.cpp b/src/main_tests.cpp index 29e56b496..4600e190d 100644 --- a/src/main_tests.cpp +++ b/src/main_tests.cpp @@ -6,10 +6,10 @@ */ // STL includes -#include #include -#include #include +#include +#include // External Libraries and Headers #include @@ -18,85 +18,81 @@ #include "utils/testing_utilities.h" /// This is the global variable to store the path to the root of Cholla -testingUtilities::GlobalString globalChollaRoot; -testingUtilities::GlobalString globalChollaBuild; -testingUtilities::GlobalString globalChollaMachine; -testingUtilities::GlobalString globalMpiLauncher; +testing_utilities::GlobalString globalChollaRoot; +testing_utilities::GlobalString globalChollaBuild; +testing_utilities::GlobalString globalChollaMachine; +testing_utilities::GlobalString globalMpiLauncher; bool globalRunCholla; bool globalCompareSystemTestResults; - /*! * \brief Class for parsing input flags. Modified from * https://stackoverflow.com/questions/865668/parsing-command-line-arguments-in-c * */ -class InputParser{ - public: - // ===================================================================== - /*! - * \brief Get the option that follows the given flag. Also checks that - * the flag exists and is not empty - * - * \param option The string option to look for - * \return const std::string& The option the follows a given flag - */ - const std::string& getCmdOption(const std::string &option) const - { - // First check that the option exists - if(not cmdOptionExists(option)) - { - std::string errMessage = "Error: argument '" + option + "' not found. "; - throw std::invalid_argument(errMessage); - } +class InputParser +{ + public: + // ===================================================================== + /*! + * \brief Get the option that follows the given flag. Also checks that + * the flag exists and is not empty + * + * \param option The string option to look for + * \return const std::string& The option the follows a given flag + */ + const std::string &Get_Cmd_Option(const std::string &option) const + { + // First check that the option exists + if (not Cmd_Option_Exists(option)) { + std::string errMessage = "Error: argument '" + option + "' not found. "; + throw std::invalid_argument(errMessage); + } - std::vector::const_iterator itr; - itr = std::find(this->_tokens.begin(), this->_tokens.end(), option); - if (itr != this->_tokens.end() && ++itr != this->_tokens.end()) - { - return *itr; - } - else - { - std::string errMessage = "Error: empty argument '" + option + "'"; - throw std::invalid_argument(errMessage); - } - } - // ===================================================================== + std::vector::const_iterator itr; + itr = std::find(this->_tokens.begin(), this->_tokens.end(), option); + if (itr != this->_tokens.end() && ++itr != this->_tokens.end()) { + return *itr; + } else { + std::string errMessage = "Error: empty argument '" + option + "'"; + throw std::invalid_argument(errMessage); + } + } + // ===================================================================== - // ===================================================================== - /*! - * \brief Checks that an option exists. Returns True if it exists and - * False otherwise - * - * \param option The option flag to search for - * \return true The option flag exists in argv - * \return false The option flage does not exist in argv - */ - bool cmdOptionExists(const std::string &option) const - { - return std::find(this->_tokens.begin(), this->_tokens.end(), option) - != this->_tokens.end(); - } - // ===================================================================== + // ===================================================================== + /*! + * \brief Checks that an option exists. Returns True if it exists and + * False otherwise + * + * \param option The option flag to search for + * \return true The option flag exists in argv + * \return false The option flage does not exist in argv + */ + bool Cmd_Option_Exists(const std::string &option) const + { + return std::find(this->_tokens.begin(), this->_tokens.end(), option) != this->_tokens.end(); + } + // ===================================================================== - // ===================================================================== - // constructor and destructor - /*! - * \brief Construct a new Input Parser object - * - * \param argc argc from main - * \param argv argv from main - */ - InputParser (int &argc, char **argv) - { - for (int i=1; i < argc; ++i) - this->_tokens.push_back(std::string(argv[i])); - } - ~InputParser() = default; - // ===================================================================== - private: - std::vector _tokens; + // ===================================================================== + // constructor and destructor + /*! + * \brief Construct a new Input Parser object + * + * \param argc argc from main + * \param argv argv from main + */ + InputParser(int &argc, char **argv) + { + for (int i = 1; i < argc; ++i) { + this->_tokens.emplace_back(argv[i]); + } + } + ~InputParser() = default; + // ===================================================================== + private: + std::vector _tokens; }; /*! @@ -111,48 +107,30 @@ class InputParser{ */ int main(int argc, char **argv) { - // First we initialize Googletest. Note, this removes all gtest related - // arguments from argv and argc - ::testing::InitGoogleTest(&argc, argv); - - // Make sure death tests are threadsafe. This is potentially much slower than - // using "fast" instead of "threadsafe" but it makes sure tests are threadsafe - // in a multithreaded environment. If the performance becomes an issue we can - // try "fast", it can also be set on a test by test basis - ::testing::GTEST_FLAG(death_test_style) = "threadsafe"; + // First we initialize Googletest. Note, this removes all gtest related + // arguments from argv and argc + ::testing::InitGoogleTest(&argc, argv); - // Initialize global variables - InputParser input(argc, argv); - globalChollaRoot.init(input.getCmdOption("--cholla-root")); - globalChollaBuild.init(input.getCmdOption("--build-type")); - globalChollaMachine.init(input.getCmdOption("--machine")); - if (input.cmdOptionExists("--mpi-launcher")) - { - globalMpiLauncher.init(input.getCmdOption("--mpi-launcher")); - } - else - { - globalMpiLauncher.init("mpirun -np"); - } + // Make sure death tests are threadsafe. This is potentially much slower than + // using "fast" instead of "threadsafe" but it makes sure tests are threadsafe + // in a multithreaded environment. If the performance becomes an issue we can + // try "fast", it can also be set on a test by test basis + ::testing::GTEST_FLAG(death_test_style) = "threadsafe"; - if (input.cmdOptionExists("--runCholla=false")) - { - globalRunCholla = false; - } - else - { - globalRunCholla = true; - } + // Initialize global variables + InputParser input(argc, argv); + globalChollaRoot.init(input.Get_Cmd_Option("--cholla-root")); + globalChollaBuild.init(input.Get_Cmd_Option("--build-type")); + globalChollaMachine.init(input.Get_Cmd_Option("--machine")); + if (input.Cmd_Option_Exists("--mpi-launcher")) { + globalMpiLauncher.init(input.Get_Cmd_Option("--mpi-launcher")); + } else { + globalMpiLauncher.init("mpirun -np"); + } - if (input.cmdOptionExists("--compareSystemTestResults=false")) - { - globalCompareSystemTestResults = false; - } - else - { - globalCompareSystemTestResults = true; - } + globalRunCholla = not input.Cmd_Option_Exists("--runCholla=false"); + globalCompareSystemTestResults = not input.Cmd_Option_Exists("--compareSystemTestResults=false"); - // Run test and return result - return RUN_ALL_TESTS(); + // Run test and return result + return RUN_ALL_TESTS(); } diff --git a/src/mhd/ct_electric_fields.cu b/src/mhd/ct_electric_fields.cu new file mode 100644 index 000000000..f061edeb7 --- /dev/null +++ b/src/mhd/ct_electric_fields.cu @@ -0,0 +1,282 @@ +/*! + * \file ct_electric_fields.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains implementation for the CT electric fields code. Method from + * Stone & Gardiner 2009 "A simple unsplit Godunov method for multidimensional + * MHD" hereafter referred to as "S&G 2009" + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../mhd/ct_electric_fields.h" +#ifdef MHD +namespace mhd +{ +// ========================================================================= +__global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *fluxY, Real const *fluxZ, + Real const *dev_conserved, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells) +{ + // get a thread index + int const threadId = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip the first two cells since + // those ghost cells can't be reconstructed + if (xid > 0 and yid > 0 and zid > 0 and xid < nx and yid < ny and zid < nz) { + // According to Stone et al. 2008 section 5.3 and the source code of + // Athena, the following equation relate the magnetic flux to the + // face centered electric fields/EMF. -cross(V,B)x is the negative + // of the x-component of V cross B. Note that "X" is the direction + // the solver is running in this case, not necessarily the true "X". + // F_x[(grid_enum::fluxX_magnetic_z)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Z F_x[(grid_enum::fluxX_magnetic_y)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Y + // F_y[(grid_enum::fluxY_magnetic_x)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_X F_y[(grid_enum::fluxY_magnetic_z)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_Z + // F_z[(grid_enum::fluxZ_magnetic_y)*n_cells] = VxBy - BxVy = + // -(-cross(V,B))z = -EMF_Y F_z[(grid_enum::fluxZ_magnetic_x)*n_cells] = + // VxBz - BxVz = (-cross(V,B))y = EMF_X + + // Notes on Implementation Details + // - The density flux has the same sign as the velocity on the face + // and we only care about the sign so we're using the density flux + // to perform upwinding checks + // - All slopes are computed without the factor of two shown in + // Stone & Gardiner 2008 eqn. 24. That factor of two is taken care + // of in the final assembly of the electric field + + // Variable to get the sign of the velocity at the interface. + Real signUpwind; + + // Slope and face variables. Format is + // "__". Slope/Face indicates if the + // value is a slope or a face centered EMF, direction indicates the + // direction of the derivative/face and pos/neg indicates if it's + // the slope on the positive or negative side of the edge field + // being computed. Note that the direction for the face is parallel + // to the face and the other direction that is parallel to that face + // is the direction of the electric field being calculated + Real slope_x_pos, slope_x_neg, slope_y_pos, slope_y_neg, slope_z_pos, slope_z_neg, face_x_pos, face_x_neg, + face_y_pos, face_y_neg, face_z_pos, face_z_neg; + // ================ + // X electric field + // ================ + + // Y-direction slope on the positive Y side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_pos = + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Y-direction slope on the negative Y side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_neg = + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the positive Z side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_pos = + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the negative Z side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_neg = + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + + face_y_pos = + +fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_y_neg = + +fluxZ[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_x)*n_cells]; + face_z_pos = + -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; + face_z_neg = + -fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid - 1, nx, ny) + (grid_enum::fluxY_magnetic_x)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + grid_enum::ct_elec_x * n_cells] = + 0.25 * + (+face_y_pos + face_y_neg + face_z_pos + face_z_neg + slope_y_pos + slope_y_neg + slope_z_pos + slope_z_neg); + + // ================ + // Y electric field + // ================ + + // X-direction slope on the positive X side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_pos = + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the negative X side. S&G 2009 equation 23 + signUpwind = fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_neg = + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the positive Z side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_pos = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_pos = + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Z-direction slope on the negative Z side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_z_neg = mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_z_neg = + 0.5 * (mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxZ, dev_conserved, -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = + -fluxZ[cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_x_neg = + -fluxZ[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + (grid_enum::fluxZ_magnetic_y)*n_cells]; + face_z_pos = + +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; + face_z_neg = + +fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid - 1, nx, ny) + (grid_enum::fluxX_magnetic_y)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + grid_enum::ct_elec_y * n_cells] = + 0.25 * + (+face_x_pos + face_x_neg + face_z_pos + face_z_neg + slope_x_pos + slope_x_neg + slope_z_pos + slope_z_neg); + + // ================ + // Z electric field + // ================ + + // Y-direction slope on the positive Y side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_pos = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_pos = + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Y-direction slope on the negative Y side. S&G 2009 equation 23 + signUpwind = fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_y_neg = mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_y_neg = + 0.5 * (mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxY, dev_conserved, 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the positive X side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_pos = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_pos = + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // X-direction slope on the negative X side. S&G 2009 equation 23 + signUpwind = fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + grid_enum::density * n_cells]; + if (signUpwind > 0.0) { + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells); + } else if (signUpwind < 0.0) { + slope_x_neg = mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells); + } else { + slope_x_neg = + 0.5 * (mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells) + + mhd::internal::_ctSlope(fluxX, dev_conserved, -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + } + + // Load the face centered electric fields Note the negative signs to + // convert from magnetic flux to electric field + face_x_pos = + +fluxY[cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_x_neg = + +fluxY[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + (grid_enum::fluxY_magnetic_z)*n_cells]; + face_y_pos = + -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; + face_y_neg = + -fluxX[cuda_utilities::compute1DIndex(xid - 1, yid - 1, zid, nx, ny) + (grid_enum::fluxX_magnetic_z)*n_cells]; + + // sum and average face centered electric fields and slopes to get the + // edge averaged electric field. + // S&G 2009 equation 22 + ctElectricFields[threadId + grid_enum::ct_elec_z * n_cells] = + 0.25 * + (+face_x_pos + face_x_neg + face_y_pos + face_y_neg + slope_x_pos + slope_x_neg + slope_y_pos + slope_y_neg); + } +} +// ========================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/ct_electric_fields.h b/src/mhd/ct_electric_fields.h new file mode 100644 index 000000000..c151f5bd0 --- /dev/null +++ b/src/mhd/ct_electric_fields.h @@ -0,0 +1,144 @@ +/*! + * \file ct_electric_fields.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration for the kernel that computes the CT electric + * fields. Method from Stone & Gardiner 2009 "A simple unsplit Godunov method + * for multidimensional MHD" hereafter referred to as "S&G 2009" + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" + +#ifdef MHD +namespace mhd +{ +/*! + * \brief Namespace for functions required by functions within the mhd + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing + * + */ +namespace internal +{ +// ===================================================================== +/*! + * \brief Compute and return the slope of the electric field used to + * compute the CT electric fields. This function implements S&G 2009 + * equation 24 + * + * \param[in] flux The flux array + * \param[in] dev_conserved The conserved variable array + * \param[in] fluxSign The sign of the flux to convert it to magnetic + * field. Also serves to choose which magnetic flux is used, i.e. the Y + * or Z field + * \param[in] ctDirection The direction of the CT field that this slope + will be used to calculate + * \param[in] conservedQuadrent1 Which index should be reduced by one to get the + correct conserved variables. Options are -1 for no reduction, 0 for reducing + xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] conservedQuadrent2 Which index should be reduced by one to get the + correct conserved variables. Options are -1 for no reduction, 0 for reducing + xid, 1 for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent1 Which index should be reduced by one to get the + correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 + for reducing yid, and 2 for reducing zid + * \param[in] fluxQuadrent2 Which index should be reduced by one to get the + correct flux variable. Options are -1 for no reduction, 0 for reducing xid, 1 + for reducing yid, and 2 for reducing zid + * \param[in] xid The x index + * \param[in] yid The y index + * \param[in] zid The z index + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] n_cells The total number of cells + * \return Real The slope of the electric field + */ +inline __host__ __device__ Real _ctSlope(Real const *flux, Real const *dev_conserved, Real const &fluxSign, + int const &ctDirection, int const &conservedQuadrent1, + int const &conservedQuadrent2, int const &fluxQuadrent1, + int const &fluxQuadrent2, int const &xid, int const &yid, int const &zid, + int const &nx, int const &ny, int const &n_cells) +{ + // Compute the various required indices + + // Get the shifted modulos of the ctDirection. + int const modPlus1 = (ctDirection + 1) % 3; + int const modPlus2 = (ctDirection + 2) % 3; + + // Indices for the cell centered values + int const xidCentered = xid - int(conservedQuadrent1 == 0) - int(conservedQuadrent2 == 0); + int const yidCentered = yid - int(conservedQuadrent1 == 1) - int(conservedQuadrent2 == 1); + int const zidCentered = zid - int(conservedQuadrent1 == 2) - int(conservedQuadrent2 == 2); + int const idxCentered = cuda_utilities::compute1DIndex(xidCentered, yidCentered, zidCentered, nx, ny); + + // Index for the flux + int const idxFlux = cuda_utilities::compute1DIndex(xid - int(fluxQuadrent1 == 0) - int(fluxQuadrent2 == 0), + yid - int(fluxQuadrent1 == 1) - int(fluxQuadrent2 == 1), + zid - int(fluxQuadrent1 == 2) - int(fluxQuadrent2 == 2), nx, ny); + + // Indices for the face centered magnetic fields that need to be averaged + int const idxB2Shift = cuda_utilities::compute1DIndex( + xidCentered - int(modPlus1 == 0), yidCentered - int(modPlus1 == 1), zidCentered - int(modPlus1 == 2), nx, ny); + int const idxB3Shift = cuda_utilities::compute1DIndex( + xidCentered - int(modPlus2 == 0), yidCentered - int(modPlus2 == 1), zidCentered - int(modPlus2 == 2), nx, ny); + + // Load values for cell centered electric field. B1 (not present) is + // the magnetic field in the same direction as the `ctDirection` + // variable, B2 and B3 are the next two fields cyclically. i.e. if + // B1=Bx then B2=By and B3=Bz, if B1=By then B2=Bz and B3=Bx. The + // same rules apply for the momentum + Real const density = dev_conserved[idxCentered + grid_enum::density * n_cells]; + Real const Momentum2 = dev_conserved[idxCentered + (modPlus1 + grid_enum::momentum_x) * n_cells]; + Real const Momentum3 = dev_conserved[idxCentered + (modPlus2 + grid_enum::momentum_x) * n_cells]; + Real const B2Centered = 0.5 * (dev_conserved[idxCentered + (modPlus1 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB2Shift + (modPlus1 + grid_enum::magnetic_start) * n_cells]); + Real const B3Centered = 0.5 * (dev_conserved[idxCentered + (modPlus2 + grid_enum::magnetic_start) * n_cells] + + dev_conserved[idxB3Shift + (modPlus2 + grid_enum::magnetic_start) * n_cells]); + + // Compute the electric field in the center with a cross product + Real const electric_centered = (Momentum3 * B2Centered - Momentum2 * B3Centered) / density; + + // Load face centered electric field, note fluxSign to correctly do + // the shift from magnetic flux to EMF/electric field and to choose + // which field to use + Real const electric_face = fluxSign * flux[idxFlux + (int(fluxSign == 1) + grid_enum::magnetic_start) * n_cells]; + + // Compute the slope and return it + // S&G 2009 equation 24 + return electric_face - electric_centered; +} +// ===================================================================== +} // namespace internal + +// ========================================================================= +/*! + * \brief Compute the Constrained Transport electric fields used to evolve + * the magnetic field. Note that this function requires that the density be + * non-zero or it will return Nans. + * + * \param[in] fluxX The flux on the x+1/2 face of each cell + * \param[in] fluxY The flux on the y+1/2 face of each cell + * \param[in] fluxZ The flux on the z+1/2 face of each cell + * \param[in] dev_conserved The device resident grid + * \param[out] ctElectricFields The CT electric fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + */ +__global__ void Calculate_CT_Electric_Fields(Real const *fluxX, Real const *fluxY, Real const *fluxZ, + Real const *dev_conserved, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells); +// ========================================================================= +} // end namespace mhd +#endif // MHD \ No newline at end of file diff --git a/src/mhd/ct_electric_fields_tests.cu b/src/mhd/ct_electric_fields_tests.cu new file mode 100644 index 000000000..d3a8ea4dc --- /dev/null +++ b/src/mhd/ct_electric_fields_tests.cu @@ -0,0 +1,274 @@ +/*! + * \file ct_electric_fields_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the CT electric fields + * + */ + +// STL Includes +#include +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../io/io.h" +#include "../mhd/ct_electric_fields.h" +#include "../utils/testing_utilities.h" + +#ifdef MHD +// ============================================================================= +// Tests for the mhd::Calculate_CT_Electric_Fields kernel +// ============================================================================= + +// ============================================================================= +/*! + * \brief Test fixture for tMHDCalculateCTElectricFields test suite + * + */ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDCalculateCTElectricFields : public ::testing::Test +{ + public: + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDCalculateCTElectricFields() + : n_cells(nx * ny * nz), + fluxX(n_cells * (grid_enum::num_flux_fields)), + fluxY(n_cells * (grid_enum::num_flux_fields)), + fluxZ(n_cells * (grid_enum::num_flux_fields)), + grid(n_cells * (grid_enum::num_fields)), + testCTElectricFields(n_cells * 3, -999.), + fiducialData(n_cells * 3, -999.), + dimGrid((n_cells + TPB - 1) / TPB, 1, 1), + dimBlock(TPB, 1, 1) + { + // Allocate device arrays + GPU_Error_Check(cudaMalloc(&dev_fluxX, fluxX.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_fluxY, fluxY.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_fluxZ, fluxZ.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_grid, grid.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_testCTElectricFields, testCTElectricFields.size() * sizeof(double))); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(fluxX), std::end(fluxX), 0.); + std::iota(std::begin(fluxY), std::end(fluxY), fluxX.back() + 1); + std::iota(std::begin(fluxZ), std::end(fluxZ), fluxY.back() + 1); + std::iota(std::begin(grid), std::end(grid), fluxZ.back() + 1); + } + ~tMHDCalculateCTElectricFields() = default; + + protected: + // Initialize the test grid and other state variables + size_t const nx = 2, ny = nx, nz = nx; + size_t const n_cells; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector fluxX; + std::vector fluxY; + std::vector fluxZ; + std::vector grid; + std::vector testCTElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_fluxX, *dev_fluxY, *dev_fluxZ, *dev_grid, *dev_testCTElectricFields; + + /*! + * \brief Launch the kernel and check results + * + */ + void Run_Test() + { + // Copy values to GPU + GPU_Error_Check(cudaMemcpy(dev_fluxX, fluxX.data(), fluxX.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_fluxY, fluxY.data(), fluxY.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_fluxZ, fluxZ.data(), fluxZ.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_grid, grid.data(), grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_testCTElectricFields, testCTElectricFields.data(), + testCTElectricFields.size() * sizeof(Real), cudaMemcpyHostToDevice)); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Calculate_CT_Electric_Fields, dimGrid, dimBlock, 0, 0, dev_fluxX, dev_fluxY, dev_fluxZ, + dev_grid, dev_testCTElectricFields, nx, ny, nz, n_cells); + GPU_Error_Check(); + + // Copy test data back + GPU_Error_Check(cudaMemcpy(testCTElectricFields.data(), dev_testCTElectricFields, + testCTElectricFields.size() * sizeof(Real), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) { + int xid, yid, zid; + testing_utilities::Check_Results(fiducialData.at(i), testCTElectricFields.at(i), + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + } + } +}; +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, PositiveVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(7) = 60.951467108788492; + fiducialData.at(15) = -98.736587665919359; + fiducialData.at(23) = 61.768055665002557; + + // Launch kernel and check results + Run_Test(); +} +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, NegativeVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(7) = 59.978246483260179; + fiducialData.at(15) = -97.279949010457187; + fiducialData.at(23) = 61.280813140085613; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) { + fluxX.at(i) = -fluxX.at(i); + fluxY.at(i) = -fluxY.at(i); + fluxZ.at(i) = -fluxZ.at(i); + } + + // Launch kernel and check results + Run_Test(); +} +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDCalculateCTElectricFields, ZeroVelocityExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(7) = 60.464856796024335; + fiducialData.at(15) = -98.008268338188287; + fiducialData.at(23) = 61.524434402544081; + + // Set the density fluxes to be negative to indicate a negative velocity + // across the face + for (size_t i = 0; i < n_cells; i++) { + fluxX.at(i) = 0.0; + fluxY.at(i) = 0.0; + fluxZ.at(i) = 0.0; + } + + // Launch kernel and check results + Run_Test(); +} +// ============================================================================= + +// ============================================================================= +TEST(tMHDCTSlope, CorrectInputExpectCorrectOutput) +{ + // Set up the basic parameters + size_t const nx = 5; + size_t const ny = nx; + size_t const nz = nx; + int const xid = nx / 2; + int const yid = ny / 2; + int const zid = nz / 2; + size_t const n_cells = nx * ny * nz; + + // Set up the grid + std::vector flux(grid_enum::num_fields * n_cells), conserved(grid_enum::num_fields * n_cells); + + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(-5, 5); + + for (double& conserved_data : conserved) { + conserved_data = doubleRand(prng); + } + for (double& flux_data : flux) { + flux_data = doubleRand(prng); + } + + // Fiducial data + std::vector fiducial_data = { + -6.8725060451062561, -77.056763568617669, 1.4564238051915397, 5.4541656143291437, -0.83503550003671911, + -78.091781647940934, -2.6187125848387525, -5.6934594000939542, -16.243259069749971, -59.321631150095314, + 0.99291378610068892, 4.4004574252725384, -1.6902722376320516, -63.074645759822637, -4.5776373499662899, + -19.476095152639683, -2.0173881091784471, -74.484407919605786, -7.8184484634991724, -0.23206265131850434, + 0.41622472388590037, -74.479121547383727, -6.9903417764222358, -1.832282425083853}; + + // Get test data. Only test the options that will be used + std::vector test_data; + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 2, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 0, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, -1, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 1, 2, 1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 0, 2, -1, -1, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 2, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 1, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, -1, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, -1, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 0, 2, 0, 2, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 1, 2, -1, 2, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, -1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), 1, 2, 1, -1, 1, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 1, -1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, -1, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, 1, 0, 1, xid, yid, zid, nx, ny, n_cells)); + test_data.emplace_back( + mhd::internal::_ctSlope(flux.data(), conserved.data(), -1, 2, 0, -1, 0, -1, xid, yid, zid, nx, ny, n_cells)); + + // Check the results + ASSERT_EQ(test_data.size(), fiducial_data.size()); + + for (size_t i = 0; i < test_data.size(); i++) { + testing_utilities::Check_Results(fiducial_data.at(i), test_data.at(i), ""); + } +} +// ============================================================================= +#endif // MHD diff --git a/src/mhd/magnetic_divergence.cu b/src/mhd/magnetic_divergence.cu new file mode 100644 index 000000000..f49e04218 --- /dev/null +++ b/src/mhd/magnetic_divergence.cu @@ -0,0 +1,126 @@ +/*! + * \file mhd_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of various utility functions for MHD and + * for the various kernels, functions, and tools required for the 3D VL+CT MHD + * integrator. Due to the CUDA/HIP compiler requiring that device functions be + * directly accessible to the file they're used in most device functions will be + * implemented in the header file. Uses the same method described in Stone et + * al. 2008 "ATHENA: A new code for astrophysical MHD", hereafter referred to as + * Stone et al. 2008 + * + */ + +// STL Includes +#include +#include + +// External Includes + +// Local Includes +#include "../grid/grid3D.h" +#include "../io/io.h" +#include "../mhd/magnetic_divergence.h" +#include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" +#include "../utils/error_handling.h" +#include "../utils/reduction_utilities.h" +#ifdef MHD + +namespace mhd +{ +// ========================================================================= +__global__ void calculateMagneticDivergence(Real const *dev_conserved, Real *dev_maxDivergence, Real const dx, + Real const dy, Real const dz, int const nx, int const ny, int const nz, + int const n_cells) +{ + // Variables to store the divergence + Real cellDivergence; + Real maxDivergence = 0.0; + + // Index variables + int xid, yid, zid, id_xMin1, id_yMin1, id_zMin1; + + // Grid stride loop to perform as much of the reduction as possible + for (size_t id = threadIdx.x + blockIdx.x * blockDim.x; id < n_cells; id += blockDim.x * gridDim.x) { + // compute the real indices + cuda_utilities::compute3DIndices(id, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot + // have their divergences computed due to a missing face; + if (xid > 1 and yid > 1 and zid > 1 and xid < nx and yid < ny and zid < nz) { + // Compute the various offset indices + id_xMin1 = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); + id_yMin1 = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny); + id_zMin1 = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny); + + // Compute divergence + // Stone et al. 2008 equation 25 + cellDivergence = ((dev_conserved[id + (grid_enum::magnetic_x)*n_cells] - + dev_conserved[id_xMin1 + (grid_enum::magnetic_x)*n_cells]) / + dx) + + ((dev_conserved[id + (grid_enum::magnetic_y)*n_cells] - + dev_conserved[id_yMin1 + (grid_enum::magnetic_y)*n_cells]) / + dy) + + ((dev_conserved[id + (grid_enum::magnetic_z)*n_cells] - + dev_conserved[id_zMin1 + (grid_enum::magnetic_z)*n_cells]) / + dz); + + maxDivergence = max(maxDivergence, fabs(cellDivergence)); + } + } + + // Perform reduction across the entire grid + reduction_utilities::gridReduceMax(maxDivergence, dev_maxDivergence); +} +// ========================================================================= + +// ============================================================================= +Real checkMagneticDivergence(Grid3D const &G) +{ + // Compute the local value of the divergence + // First let's create some variables we'll need. + cuda_utilities::AutomaticLaunchParams static const launchParams(mhd::calculateMagneticDivergence); + cuda_utilities::DeviceVector static dev_maxDivergence(1); + + // Set the device side divergence to the smallest possible double so that + // the reduction isn't using the maximum value of the previous iteration + dev_maxDivergence.assign(std::numeric_limits::lowest()); + + // Now lets get the local maximum divergence + hipLaunchKernelGGL(mhd::calculateMagneticDivergence, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + G.C.device, dev_maxDivergence.data(), G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, G.H.nz, G.H.n_cells); + GPU_Error_Check(); + Real max_magnetic_divergence = dev_maxDivergence[0]; + + #ifdef MPI_CHOLLA + // Now that we have the local maximum let's get the global maximum + max_magnetic_divergence = ReduceRealMax(max_magnetic_divergence); + #endif // MPI_CHOLLA + + // If the magnetic divergence is greater than the limit then raise a warning and exit. + // This maximum value of divergence was chosen after a discussion with Chris White of the Flatiron institute and an + // Athena dev. He said that in his experience issues start showing up at around 1E-8 divergence so this is set with an + // order of magnitude margin. + Real static const magnetic_divergence_limit = 1.0E-9; + if (max_magnetic_divergence > magnetic_divergence_limit) { + // Report the error and exit + chprintf( + "The magnetic divergence has exceeded the maximum allowed value. " + "Divergence = %7.4e, the maximum allowed divergence = %7.4e\n", + max_magnetic_divergence, magnetic_divergence_limit); + chexit(-1); + } else if (max_magnetic_divergence < 0.0) { + // Report the error and exit + chprintf("The magnetic divergence is negative. Divergence = %7.4e\n", max_magnetic_divergence); + chexit(-1); + } else // The magnetic divergence is within acceptable bounds + { + chprintf("Global maximum magnetic divergence = %7.4e\n", max_magnetic_divergence); + } + + return max_magnetic_divergence; +} +// ============================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_divergence.h b/src/mhd/magnetic_divergence.h new file mode 100644 index 000000000..3833692c6 --- /dev/null +++ b/src/mhd/magnetic_divergence.h @@ -0,0 +1,63 @@ +/*! + * \file magnetic_divergence.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration for the functions that compute the magnetic + * divergence + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../grid/grid3D.h" +#include "../utils/gpu.hpp" + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ +// ========================================================================= +/*! + * \brief Kernel to compute the maximum divergence of the magnetic field in + * the grid. Uses `reduction_utilities::gridReduceMax` and as such should be + * called with the minimum number of blocks. Recommend using the occupancy + * API + * + * \param[in] dev_conserved The device array of conserved variables + * \param[out] maxDivergence The device scalar to store the reduced divergence at + * \param[in] dx Cell size in the X-direction + * \param[in] dy Cell size in the Y-direction + * \param[in] dz Cell size in the Z-direction + * \param[in] nx Number of cells in the X-direction + * \param[in] ny Number of cells in the Y-direction + * \param[in] nz Number of cells in the Z-direction + * \param[in] n_cells Total number of cells + */ +__global__ void calculateMagneticDivergence(Real const *dev_conserved, Real *maxDivergence, Real const dx, + Real const dy, Real const dz, int const nx, int const ny, int const nz, + int const n_cells); +// ========================================================================= + +// ========================================================================= +/*! + * \brief Compute the maximum magnetic divergence in the grid and report + * an error if it exceeds the magnetic divergence limit or is negative. The + * magnetic divergence limit is 1E-14 as determined by Athena as a + * reasonable upper bound for correctness. + * + * \param G The grid object + * \return Real The maximum magnetic divergence found in the grid. Can + * usually be ignored since all checking is done in the fucntion, mostly + * this return is for testing. + */ +Real checkMagneticDivergence(Grid3D const &G); +// ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_divergence_tests.cu b/src/mhd/magnetic_divergence_tests.cu new file mode 100644 index 000000000..7d7b35294 --- /dev/null +++ b/src/mhd/magnetic_divergence_tests.cu @@ -0,0 +1,69 @@ +/*! + * \file magnetic_divergence_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the magnetic divergence code + * + */ + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../mhd/magnetic_divergence.h" +#include "../utils/DeviceVector.h" +#include "../utils/testing_utilities.h" + +#ifdef MHD +// ============================================================================= +// Tests for the magnetic field divergence functions +// ============================================================================= +TEST(tMHDGrid3DcheckMagneticDivergence, CorrectInputExpectCorrectOutput) +{ + // Grid Parameters & testing parameters + size_t const gridSize = 96; // Needs to be at least 64 so that each thread has a value + size_t const n_ghost = 4; + + // Instantiate Grid3D object + Grid3D G; + G.H.dx = 3; + G.H.dy = G.H.dx; + G.H.dz = G.H.dx; + G.H.nx = gridSize + 2 * n_ghost; + G.H.ny = G.H.nx; + G.H.nz = G.H.nx; + G.H.n_cells = G.H.nx * G.H.ny * G.H.nz; + G.H.n_fields = 8; + + // Setup host grid. Fill host grid with random values and randomly assign + // maximum value + std::vector host_grid(G.H.n_cells * G.H.n_fields); + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(1, 5); + for (double& host_data : host_grid) { + host_data = doubleRand(prng) / 1E15; + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + G.C.device = dev_grid.data(); + dev_grid.cpyHostToDevice(host_grid); + + // Perform test + InitializeChollaMPI(NULL, NULL); + double max_magnetic_divergence = mhd::checkMagneticDivergence(G); + MPI_Finalize(); + // Perform Comparison + Real const fiducialDivergence = 3.6318132783263106 / 1E15; + testing_utilities::Check_Results(fiducialDivergence, max_magnetic_divergence, "maximum divergence"); +} +// ============================================================================= +// End of tests for the magnetic field divergence functions +// ============================================================================= +#endif // MHD diff --git a/src/mhd/magnetic_update.cu b/src/mhd/magnetic_update.cu new file mode 100644 index 000000000..acfd44982 --- /dev/null +++ b/src/mhd/magnetic_update.cu @@ -0,0 +1,84 @@ +/*! + * \file magnetic_update.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the definition of the kernel to update the magnetic field. + * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" hereafter referred to as "S&G 2009" + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../mhd/magnetic_update.h" +#include "../utils/cuda_utilities.h" +#ifdef MHD +namespace mhd +{ +// ========================================================================= +__global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells, Real const dt, Real const dx, + Real const dy, Real const dz) +{ + // get a thread index + int const blockId = blockIdx.x + blockIdx.y * gridDim.x; + int const threadId = threadIdx.x + blockId * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(threadId, nx, ny, xid, yid, zid); + + // Thread guard to avoid overrun and to skip ghost cells that cannot be + // evolved due to missing electric fields that can't be reconstructed + if (xid > 0 and yid > 0 and zid > 0 and xid < nx - 1 and yid < ny - 1 and zid < nz - 1) { + // Compute the three dt/dx quantities + Real const dtodx = dt / dx; + Real const dtody = dt / dy; + Real const dtodz = dt / dz; + + // Load the various edge electric fields required. The '1' and '2' + // fields are not shared and the '3' fields are shared by two of the + // updates + Real electric_x_1 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_x * n_cells]; + Real electric_x_2 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_x * n_cells]; + Real electric_x_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid + 1, nx, ny)) + + grid_enum::ct_elec_x * n_cells]; + Real electric_y_1 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + grid_enum::ct_elec_y * n_cells]; + Real electric_y_2 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid, zid + 1, nx, ny)) + grid_enum::ct_elec_y * n_cells]; + Real electric_y_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid + 1, nx, ny)) + + grid_enum::ct_elec_y * n_cells]; + Real electric_z_1 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells]; + Real electric_z_2 = + ctElectricFields[(cuda_utilities::compute1DIndex(xid, yid + 1, zid, nx, ny)) + grid_enum::ct_elec_z * n_cells]; + Real electric_z_3 = ctElectricFields[(cuda_utilities::compute1DIndex(xid + 1, yid + 1, zid, nx, ny)) + + grid_enum::ct_elec_z * n_cells]; + + // Perform Updates + + // X field update + // S&G 2009 equation 10 + destinationGrid[threadId + grid_enum::magnetic_x * n_cells] = + sourceGrid[threadId + grid_enum::magnetic_x * n_cells] + dtodz * (electric_y_3 - electric_y_1) + + dtody * (electric_z_1 - electric_z_3); + + // Y field update + // S&G 2009 equation 11 + destinationGrid[threadId + grid_enum::magnetic_y * n_cells] = + sourceGrid[threadId + grid_enum::magnetic_y * n_cells] + dtodx * (electric_z_3 - electric_z_2) + + dtodz * (electric_x_1 - electric_x_3); + + // Z field update + // S&G 2009 equation 12 + destinationGrid[threadId + grid_enum::magnetic_z * n_cells] = + sourceGrid[threadId + grid_enum::magnetic_z * n_cells] + dtody * (electric_x_3 - electric_x_2) + + dtodx * (electric_y_2 - electric_y_3); + } +} +// ========================================================================= +} // end namespace mhd +#endif // MHD diff --git a/src/mhd/magnetic_update.h b/src/mhd/magnetic_update.h new file mode 100644 index 000000000..2601abdb7 --- /dev/null +++ b/src/mhd/magnetic_update.h @@ -0,0 +1,51 @@ +/*! + * \file magnetic_update.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the declaration of the kernel to update the magnetic field. + * Method from Stone & Gardiner 2009 "A simple unsplit Godunov method for + * multidimensional MHD" hereafter referred to as "S&G 2009" + * + */ + +#pragma once + +// STL Includes + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ +// ========================================================================= +/*! + * \brief Update the magnetic field using the CT electric fields + * + * \param[in] sourceGrid The array which holds the old values of the + * magnetic field + * \param[out] destinationGrid The array to hold the updated values of the + * magnetic field + * \param[in] ctElectricFields The array of constrained transport electric + * fields + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[in] nz The number of cells in the z-direction + * \param[in] n_cells The total number of cells + * \param[in] dt The time step. If doing the half time step update make sure + * to divide it by two when passing the time step to this kernel + * \param[in] dx The size of each cell in the x-direction + * \param[in] dy The size of each cell in the y-direction + * \param[in] dz The size of each cell in the z-direction + */ +__global__ void Update_Magnetic_Field_3D(Real *sourceGrid, Real *destinationGrid, Real *ctElectricFields, int const nx, + int const ny, int const nz, int const n_cells, Real const dt, Real const dx, + Real const dy, Real const dz); +// ========================================================================= +} // end namespace mhd \ No newline at end of file diff --git a/src/mhd/magnetic_update_tests.cu b/src/mhd/magnetic_update_tests.cu new file mode 100644 index 000000000..7cfb8757c --- /dev/null +++ b/src/mhd/magnetic_update_tests.cu @@ -0,0 +1,127 @@ +/*! + * \file magnetic_update_tests.cu + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the magnetic update code + * + */ + +// STL Includes +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../mhd/magnetic_update.h" +#include "../utils/cuda_utilities.h" +#include "../utils/testing_utilities.h" + +#ifdef MHD +// ============================================================================= +/*! + * \brief Test fixture for tMHDUpdateMagneticField3D test suite + * + */ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDUpdateMagneticField3D : public ::testing::Test +{ + public: + /*! + * \brief Initialize and allocate all the various required variables and + * arrays + * + */ + tMHDUpdateMagneticField3D() + : n_cells(nx * ny * nz), + sourceGrid(n_cells * (grid_enum::num_fields)), + destinationGrid(n_cells * (grid_enum::num_fields), -999.), + ctElectricFields(n_cells * 3), + fiducialData(n_cells * (grid_enum::num_fields), -999.), + dimGrid((n_cells + TPB - 1) / TPB, 1, 1), + dimBlock(TPB, 1, 1) + { + // Allocate device arrays + GPU_Error_Check(cudaMalloc(&dev_sourceGrid, sourceGrid.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_destinationGrid, destinationGrid.size() * sizeof(double))); + GPU_Error_Check(cudaMalloc(&dev_ctElectricFields, ctElectricFields.size() * sizeof(double))); + + // Populate the grids with values where vector.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(sourceGrid), std::end(sourceGrid), 0.); + std::iota(std::begin(ctElectricFields), std::end(ctElectricFields), sourceGrid.back() + 1); + } + ~tMHDUpdateMagneticField3D() = default; + + protected: + // Initialize the test grid and other state variables + size_t const nx = 3, ny = nx, nz = nx; + size_t const n_cells; + Real const dt = 3.2, dx = 2.5, dy = dx, dz = dx; + + // Launch Parameters + dim3 const dimGrid; // How many blocks in the grid + dim3 const dimBlock; // How many threads per block + + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector sourceGrid; + std::vector destinationGrid; + std::vector ctElectricFields; + std::vector fiducialData; + + // device pointers + double *dev_sourceGrid, *dev_destinationGrid, *dev_ctElectricFields, *dev_fiducialData; + + /*! + * \brief Launch the kernel and check results + * + */ + void Run_Test() + { + // Copy values to GPU + GPU_Error_Check( + cudaMemcpy(dev_sourceGrid, sourceGrid.data(), sourceGrid.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_destinationGrid, destinationGrid.data(), destinationGrid.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(dev_ctElectricFields, ctElectricFields.data(), ctElectricFields.size() * sizeof(Real), + cudaMemcpyHostToDevice)); + + // Call the kernel to test + hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, dimGrid, dimBlock, 0, 0, dev_sourceGrid, dev_destinationGrid, + dev_ctElectricFields, nx, ny, nz, n_cells, dt, dx, dy, dz); + GPU_Error_Check(); + + // Copy test data back + GPU_Error_Check(cudaMemcpy(destinationGrid.data(), dev_destinationGrid, destinationGrid.size() * sizeof(Real), + cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + // Check the results + for (size_t i = 0; i < fiducialData.size(); i++) { + int xid, yid, zid; + cuda_utilities::compute3DIndices(i, nx, ny, xid, yid, zid); + testing_utilities::Check_Results(fiducialData.at(i), destinationGrid.at(i), + "value at i = " + std::to_string(i) + ", xid = " + std::to_string(xid) + + ", yid = " + std::to_string(yid) + ", zid = " + std::to_string(zid)); + } + } +}; +// ============================================================================= + +// ============================================================================= +TEST_F(tMHDUpdateMagneticField3D, CorrectInputExpectCorrectOutput) +{ + // Fiducial values + fiducialData.at(148) = 155.68000000000001; + fiducialData.at(175) = 164.75999999999999; + fiducialData.at(202) = 204.56; + + // Launch kernel and check results + Run_Test(); +} +// ============================================================================= +#endif // MHD diff --git a/src/model/disk_ICs.cpp b/src/model/disk_ICs.cpp index f6ada7002..055827001 100644 --- a/src/model/disk_ICs.cpp +++ b/src/model/disk_ICs.cpp @@ -1,300 +1,296 @@ /*! \file disk_ICs.cpp * \brief Definitions of initial conditions for hydrostatic disks. - Note that the grid is mapped to 1D as i + (x_dim)*j + (x_dim*y_dim)*k. */ + Note that the grid is mapped to 1D as i + (x_dim)*j + + (x_dim*y_dim)*k. */ -#include -#include #include -#include +#include +#include #include #include + +#include + #include "../global/global.h" #include "../grid/grid3D.h" -#include "../mpi/mpi_routines.h" #include "../io/io.h" +#include "../mpi/mpi_routines.h" #include "../utils/error_handling.h" - +#include "disk_galaxy.h" // #define DISK_ICS // function with logarithms used in NFW definitions -Real log_func(Real y) -{ - return log(1+y) - y/(1+y); -} +Real Log_Func(Real y) { return log(1 + y) - y / (1 + y); } -//vertical acceleration in NFW halo -Real gz_halo_D3D(Real R, Real z, Real *hdp) +// vertical acceleration in NFW halo +Real Gz_Halo_D3D(Real R, Real z, Real *hdp) { - Real M_h = hdp[2]; //halo mass - Real R_h = hdp[5]; //halo scale length - Real c_vir = hdp[4]; //halo concentration parameter - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; - Real z_comp = z/r; - - Real A = log_func(x); - Real B = 1.0 / (r*r); - Real C = GN*M_h/log_func(c_vir); - - //checked with wolfram alpha - return -C*A*B*z_comp; + Real M_h = hdp[2]; // halo mass + Real R_h = hdp[5]; // halo scale length + Real c_vir = hdp[4]; // halo concentration parameter + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real z_comp = z / r; + + Real A = Log_Func(x); + Real B = 1.0 / (r * r); + Real C = GN * M_h / Log_Func(c_vir); + + // checked with wolfram alpha + return -C * A * B * z_comp; } - -//radial acceleration in NFW halo -Real gr_halo_D3D(Real R, Real z, Real *hdp) +// radial acceleration in NFW halo +Real Gr_Halo_D3D(Real R, Real z, Real *hdp) { - Real M_h = hdp[2]; //halo mass - Real R_h = hdp[5]; //halo scale length - Real c_vir = hdp[4]; //halo concentration parameter - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; - Real r_comp = R/r; - - Real A = log_func(x); - Real B = 1.0 / (r*r); - Real C = GN*M_h/log_func(c_vir); - - //checked with wolfram alpha - return -C*A*B*r_comp; + Real M_h = hdp[2]; // halo mass + Real R_h = hdp[5]; // halo scale length + Real c_vir = hdp[4]; // halo concentration parameter + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real r_comp = R / r; + + Real A = Log_Func(x); + Real B = 1.0 / (r * r); + Real C = GN * M_h / Log_Func(c_vir); + + // checked with wolfram alpha + return -C * A * B * r_comp; } -//disk radial surface density profile -Real Sigma_disk_D3D(Real r, Real *hdp) +// disk radial surface density profile +Real Sigma_Disk_D3D(Real r, Real *hdp) { - //return the exponential surface density + // return the exponential surface density Real Sigma_0 = hdp[9]; Real R_g = hdp[10]; - Real R_c = 4.5; + Real R_c = 4.5; Real Sigma; Real delta = 0.1; - Real norm = log(1.0/3.0); - Sigma = Sigma_0 * exp(-r/R_g); + Real norm = log(1.0 / 3.0); + Sigma = Sigma_0 * exp(-r / R_g); // taper the edge of the disk to 0 if (r < R_c) { - Sigma *= 2.0 - 1.0 / (1.0 - exp((r - (4.5 - delta*norm))/delta)); - } - else { - Sigma *= 1.0 / (1.0 - exp(((4.5 + delta*norm) - r)/delta)) - 1.0; + Sigma *= 2.0 - 1.0 / (1.0 - exp((r - (4.5 - delta * norm)) / delta)); + } else { + Sigma *= 1.0 / (1.0 - exp(((4.5 + delta * norm) - r) / delta)) - 1.0; } return Sigma; } -//vertical acceleration in miyamoto nagai -Real gz_disk_D3D(Real R, Real z, Real *hdp) +// vertical acceleration in miyamoto nagai +Real Gz_Disk_D3D(Real R, Real z, Real *hdp) { - Real M_d = hdp[1]; //disk mass - Real R_d = hdp[6]; //MN disk length - Real Z_d = hdp[7]; //MN disk height - Real a = R_d; - Real b = Z_d; - Real A = sqrt(b*b + z*z); - Real B = a + A; - Real C = pow(B*B + R*R, 1.5); - - //checked with wolfram alpha - return -GN*M_d*z*B/(A*C); + Real M_d = hdp[1]; // disk mass + Real R_d = hdp[6]; // MN disk length + Real Z_d = hdp[7]; // MN disk height + Real a = R_d; + Real b = Z_d; + Real A = sqrt(b * b + z * z); + Real B = a + A; + Real C = pow(B * B + R * R, 1.5); + + // checked with wolfram alpha + return -GN * M_d * z * B / (A * C); } -//radial acceleration in miyamoto nagai -Real gr_disk_D3D(Real R, Real z, Real *hdp) +// radial acceleration in miyamoto nagai +Real Gr_Disk_D3D(Real R, Real z, Real *hdp) { - Real M_d = hdp[1]; //disk mass - Real R_d = hdp[6]; //MN disk length - Real Z_d = hdp[7]; //MN disk height - Real A = sqrt(Z_d*Z_d + z*z); - Real B = R_d + A; - Real C = pow(B*B + R*R, 1.5); - - //checked with wolfram alpha - return -GN*M_d*R/C; + Real M_d = hdp[1]; // disk mass + Real R_d = hdp[6]; // MN disk length + Real Z_d = hdp[7]; // MN disk height + Real A = sqrt(Z_d * Z_d + z * z); + Real B = R_d + A; + Real C = pow(B * B + R * R, 1.5); + + // checked with wolfram alpha + return -GN * M_d * R / C; } - -//NFW halo potential -Real phi_halo_D3D(Real R, Real z, Real *hdp) +// NFW halo potential +Real Phi_Halo_D3D(Real R, Real z, Real *hdp) { - Real M_h = hdp[2]; //halo mass - Real R_h = hdp[5]; //halo scale length - Real c_vir = hdp[4]; //halo concentration parameter - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; + Real M_h = hdp[2]; // halo mass + Real R_h = hdp[5]; // halo scale length + Real c_vir = hdp[4]; // halo concentration parameter + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; - Real C = GN*M_h/(R_h*log_func(c_vir)); + Real C = GN * M_h / (R_h * Log_Func(c_vir)); - //limit x to non-zero value - if(x<1.0e-9) + // limit x to non-zero value + if (x < 1.0e-9) { x = 1.0e-9; + } - //checked with wolfram alpha - return -C*log(1+x)/x; + // checked with wolfram alpha + return -C * log(1 + x) / x; } -//Miyamoto-Nagai potential -Real phi_disk_D3D(Real R, Real z, Real *hdp) +// Miyamoto-Nagai potential +Real Phi_Disk_D3D(Real R, Real z, Real *hdp) { - Real M_d = hdp[1]; //disk mass - Real R_d = hdp[6]; //MN disk length - Real Z_d = hdp[7]; //MN disk height - Real A = sqrt(z*z + Z_d*Z_d); - Real B = R_d + A; - Real C = sqrt(R*R + B*B); - - //patel et al. 2017, eqn 2 - return -GN*M_d/C; + Real M_d = hdp[1]; // disk mass + Real R_d = hdp[6]; // MN disk length + Real Z_d = hdp[7]; // MN disk height + Real A = sqrt(z * z + Z_d * Z_d); + Real B = R_d + A; + Real C = sqrt(R * R + B * B); + + // patel et al. 2017, eqn 2 + return -GN * M_d / C; } -//total potential -Real phi_total_D3D(Real R, Real z, Real *hdp) +// total potential +Real Phi_Total_D3D(Real R, Real z, Real *hdp) { - Real Phi_A = phi_halo_D3D(R,z,hdp); - Real Phi_B = phi_disk_D3D(R,z,hdp); + Real Phi_A = Phi_Halo_D3D(R, z, hdp); + Real Phi_B = Phi_Disk_D3D(R, z, hdp); return Phi_A + Phi_B; } -Real phi_hot_halo_D3D(Real r, Real *hdp) +Real Phi_Hot_Halo_D3D(Real r, Real *hdp) { - Real Phi_A = phi_halo_D3D(0,r,hdp); - Real Phi_B = phi_disk_D3D(0,r,hdp); - //return Phi_A; + Real Phi_A = Phi_Halo_D3D(0, r, hdp); + Real Phi_B = Phi_Disk_D3D(0, r, hdp); + // return Phi_A; return Phi_A + Phi_B; } - -//returns the cell-centered vertical -//location of the cell with index k -//k is indexed at 0 at the lowest ghost cell -Real z_hc_D3D(int k, Real dz, int nz, int ng) +// returns the cell-centered vertical +// location of the cell with index k +// k is indexed at 0 at the lowest ghost cell +Real Z_Hc_D3D(int k, Real dz, int nz, int ng) { - //checked that this works, such that the - //if dz = L_z/nz for the real domain, then the z positions - //are set correctly for cell centers with nz spanning - //the real domain, and nz + 2*ng spanning the real + ghost domains - if(!(nz%2)) - { - //even # of cells - return 0.5*dz + ((Real) (k-ng-nz/2))*dz; - }else{ - //odd # of cells - return ((Real) (k-ng-(nz-1)/2))*dz; + // checked that this works, such that the + // if dz = L_z/nz for the real domain, then the z positions + // are set correctly for cell centers with nz spanning + // the real domain, and nz + 2*ng spanning the real + ghost domains + if (!(nz % 2)) { + // even # of cells + return 0.5 * dz + ((Real)(k - ng - (int)(nz / 2))) * dz; + } else { + // odd # of cells + return ((Real)(k - ng - (int)((nz - 1) / 2))) * dz; } } -//returns the cell-centered radial -//location of the cell with index i -Real r_hc_D3D(int i, Real dr) +// returns the cell-centered radial +// location of the cell with index i +Real R_Hc_D3D(int i, Real dr) { - //the zeroth cell is centered at 0.5*dr - return 0.5*dr + ((Real) i)*dr; + // the zeroth cell is centered at 0.5*dr + return 0.5 * dr + ((Real)i) * dr; } - - -/*! \fn void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) - * \brief Calculate the density at spherical radius r due to a hydrostatic halo. Uses an analytic - expression normalized by the value of the potential at the cooling radius. */ -void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) +/*! \fn void hydrostatic_ray_analytical_D3D(Real *rho, Real *r, Real *hdp, Real + dr, int nr) + * \brief Calculate the density at spherical radius r due to a hydrostatic + halo. Uses an analytic expression normalized by the value of the potential at + the cooling radius. */ +void Hydrostatic_Ray_Analytical_D3D(Real *rho, Real *r, Real *hdp, Real dr, int nr) { - //Routine to determine the hydrostatic density profile - //along a ray from the galaxy center - int i; //index along r direction + // Routine to determine the hydrostatic density profile + // along a ray from the galaxy center + int i; // index along r direction - Real gamma = hdp[13]; //adiabatic index - Real rho_eos = hdp[18]; //density where K_EOS is set - Real cs = hdp[19]; //sound speed at rho_eos - Real r_cool = hdp[20]; //cooling radius + Real gamma = hdp[13]; // adiabatic index + Real rho_eos = hdp[18]; // density where K_EOS is set + Real cs = hdp[19]; // sound speed at rho_eos + Real r_cool = hdp[20]; // cooling radius - Real Phi_0; //potential at cooling radius + Real Phi_0; // potential at cooling radius - Real D_rho; //ratio of density at mid plane and rho_eos + Real D_rho; // ratio of density at mid plane and rho_eos - Real gmo = gamma - 1.0; //gamma-1 + Real gmo = gamma - 1.0; // gamma-1 - //compute the potential at the cooling radius - Phi_0 = phi_hot_halo_D3D(r_cool,hdp); + // compute the potential at the cooling radius + Phi_0 = Phi_Hot_Halo_D3D(r_cool, hdp); - //We are normalizing to the central density - //so D_rho == 1 + // We are normalizing to the central density + // so D_rho == 1 D_rho = 1.0; - //store densities - for(i=0;i0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e Phi_0 %E cs %e\n",R,D_rho,z_1,phi_total_D3D(R,z_1,hdp),Phi_0,cs); + // perform a simple check about the fraction of density within + // a single cell + z_1 = Z_Hc_D3D(ks, dz, nz, ng) + 0.5 * dz; // cell ceiling + D_rho = (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + if (exp(-1 * D_rho) < 0.1) { + printf( + "WARNING: >0.9 density in single cell R %e D_rho %e z_1 %e Phi(z) %e " + "Phi_0 %E cs %e\n", + R, D_rho, z_1, Phi_Total_D3D(R, z_1, hdp), Phi_0, cs); + } - //let's find the cell above the disk where the - //density falls by exp(-7) < 1.0e-3. - for(k=ks;k=7.0) + // let's find the cell above the disk where the + // density falls by exp(-7) < 1.0e-3. + for (k = ks; k < nzt; k++) { + z_1 = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz; // cell ceiling + D_rho = (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + if (D_rho >= 7.0) { break; + } } - //if(R<1.0) - // printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R = %e\n",k-ks,z_1,exp(-1*D_rho),R); + // if(R<1.0) + // printf("Cells above disk (k-ks) = %d, z_1 = %e, exp(-D) = %e, R = + // %e\n",k-ks,z_1,exp(-1*D_rho),R); - //now we can compute the unnormalized integral of the density + // now we can compute the unnormalized integral of the density z_disk_max = z_1; - //Compute surface density - z_int_min = 0.0; //kpc - z_int_max = z_1; //kpc - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); - phi_int = 0.0; - for(k=0;kz_disk_max) + n_int = 10; // integrate over a 1/10 cell + for (k = ks; k < nzt; k++) { + // find cell center, bottom, and top + z_int_min = Z_Hc_D3D(k, dz, nz, ng) - 0.5 * dz; + z_int_max = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz; + if (z_int_max > z_disk_max) { z_int_max = z_disk_max; - if(!flag) - { - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); + } + if (!flag) { + dz_int = (z_int_max - z_int_min) / ((Real)(n_int)); phi_int = 0.0; - for(i=0;i 10.) - z_2 = 10.*z_1; - //advance limit + z_0 = 1.0e-3; + z_1 = 1.0e-2; + while (!flag_phi) { + A_0 = D_rho - (Phi_Total_D3D(R, z_0, hdp) - Phi_0) / (cs * cs); + A_1 = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); + z_2 = z_1 - A_1 * (z_1 - z_0) / (A_1 - A_0); + if (fabs(z_2 - z_1) / fabs(z_1) > 10.) { + z_2 = 10. * z_1; + } + // advance limit z_0 = z_1; z_1 = z_2; - if(fabs(z_1-z_0)1000) - { + if (iter_phi > 1000) { printf("Something wrong in determining central density...\n"); - printf("iter_phi = %d\n",iter_phi); - printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n",z_0,z_1,z_2,A_0,A_1,phi_total_D3D(R,z_0,hdp),phi_total_D3D(R,z_1,hdp)); - #ifdef MPI_CHOLLA + printf("iter_phi = %d\n", iter_phi); + printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1, + Phi_Total_D3D(R, z_0, hdp), Phi_Total_D3D(R, z_1, hdp)); +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif +#endif exit(0); } } - A_1 = D_rho - (phi_total_D3D(R,z_1,hdp)-Phi_0)/(cs*cs); + A_1 = D_rho - (Phi_Total_D3D(R, z_1, hdp) - Phi_0) / (cs * cs); z_disk_max = z_1; - //Compute surface density - z_int_min = 0.0; //kpc - z_int_max = z_1; //kpc - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); - phi_int = 0.0; - for(k=0;k100) - { + if (iter > 100) { printf("About to exit...\n"); - #ifdef MPI_CHOLLA +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif +#endif exit(0); } } - //OK, at this stage we know how to set the densities - //so let's take cell averages + // OK, at this stage we know how to set the densities + // so let's take cell averages flag = 0; - n_int = 10; // integrate over a 1/10 cell - for(k=ks;kz_disk_max) + n_int = 10; // integrate over a 1/10 cell + for (k = ks; k < nzt; k++) { + // find cell center, bottom, and top + z_int_min = Z_Hc_D3D(k, dz, nz, ng) - 0.5 * dz; + z_int_max = Z_Hc_D3D(k, dz, nz, ng) + 0.5 * dz; + if (z_int_max > z_disk_max) { z_int_max = z_disk_max; - if(!flag) - { - dz_int = (z_int_max-z_int_min)/((Real) (n_int)); + } + if (!flag) { + dz_int = (z_int_max - z_int_min) / ((Real)(n_int)); phi_int = 0.0; - for(i=0;i 10.) - z_2 = 10.*z_1; + if (fabs(z_2 - z_1) / fabs(z_1) > 10.) { + z_2 = 10. * z_1; + } - //advance limit + // advance limit z_0 = z_1; z_1 = z_2; - //printf("z_0 %e z_1 %e\n",z_0,z_1); - if(fabs(z_1-z_0)1000) - { + if (iter_phi > 1000) { printf("Something wrong in determining central density...\n"); - printf("iter_phi = %d\n",iter_phi); - printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n",z_0,z_1,z_2,A_0,A_1,phi_total_D3D(0,z_0,hdp),phi_total_D3D(0,z_1,hdp)); - #ifdef MPI_CHOLLA + printf("iter_phi = %d\n", iter_phi); + printf("z_0 %e z_1 %e z_2 %e A_0 %e A_1 %e phi_0 %e phi_1 %e\n", z_0, z_1, z_2, A_0, A_1, + Phi_Total_D3D(0, z_0, hdp), Phi_Total_D3D(0, z_1, hdp)); +#ifdef MPI_CHOLLA MPI_Finalize(); - #endif +#endif exit(0); } } - //generate a high resolution density and z profile - int n_int = 1000; - Real z_int_min = 0.0; //kpc - Real z_int_max = z_1; //kpc - Real dz_int = (z_int_max-z_int_min)/((Real) (n_int)); - Real phi_int = 0.0; - - //now integrate the density profile - for(k=0;k=nr-1) - { - if(i<0) - { + // find the index of the current + // position in r_halo (based on r_hc_D3D) + i = (int)((r - 0.5 * dr) / dr); + if (i < 0 || i >= nr - 1) { + if (i < 0) { i = 0; - }else{ - i = nr-2; + } else { + i = nr - 2; } } // return the interpolated density profile - return (rho_halo[i+1] - rho_halo[i])*(r - r_halo[i])/(r_halo[i+1]-r_halo[i]) + rho_halo[i]; + return (rho_halo[i + 1] - rho_halo[i]) * (r - r_halo[i]) / (r_halo[i + 1] - r_halo[i]) + rho_halo[i]; } - - - - -/*! \fn void Disk_3D(parameters P) +/*! \fn void Disk_3D(Parameters P ) * \brief Initialize the grid with a 3D disk. */ -void Grid3D::Disk_3D(parameters p) +void Grid3D::Disk_3D(Parameters p) { - - #ifdef DISK_ICS +#ifdef DISK_ICS int i, j, k, id; Real x_pos, y_pos, z_pos, r, phi; @@ -771,71 +748,66 @@ void Grid3D::Disk_3D(parameters p) Real r_cool; // MW model - M_vir = 1.0e12; // viral mass of MW in M_sun - M_d = 6.5e10; // mass of disk in M_sun (assume all stars) - R_d = 3.5; // MW stellar disk scale length in kpc - z_d = 3.5/5.0; // MW stellar disk scale height in kpc - R_vir = 261; // MW viral radius in kpc - c_vir = 20; // MW halo concentration (to account for adiabatic contraction) - r_cool = 157.0; // cooling radius in kpc (MW) - - // M82 model - //M_vir = 5.0e10; // viral mass of M82 in M_sun (guess) - //M_d = 1.0e10; // mass of M82 disk in M_sun (Greco 2012) - //R_d = 0.8; // M82 stellar disk scale length in kpc (Mayya 2009) - //z_d = 0.15; // M82 stellar thin disk scale height in kpc (Lim 2013) - //R_vir = R_d/0.015; // M82 viral radius in kpc from R_(1/2) = 0.015 R_200 (Kravtsov 2013) - //c_vir = 10; // M82 halo concentration - //r_cool = 100.0; // cooling in kpc (M82, guess) - - M_h = M_vir - M_d; // halo mass in M_sun - R_s = R_vir / c_vir; // halo scale length in kpc - //T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 - //T_d = 2.0e5; - T_d = 1.0e4; // CHANGED FOR ISOTHERMAL - T_h = 1.0e6; // halo temperature, at density floor - rho_eos = 1.0e7; //gas eos normalized at 1e7 Msun/kpc^3 - rho_eos_h = 3.0e3; //gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5) - mu = 0.6; - - R_g = 2.0*R_d; //gas scale length in kpc - Sigma_0 = 0.25*M_d/(2*M_PI*R_g*R_g); //central surface density in Msun/kpc^2 - H_g = z_d; //initial guess for gas scale height - //rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3 - - //EOS info - cs = sqrt(KB*T_d/(mu*MP))*TIME_UNIT/LENGTH_UNIT; //sound speed in kpc/kyr - cs_h = sqrt(KB*T_h/(mu*MP))*TIME_UNIT/LENGTH_UNIT; //sound speed in kpc/kyr - - //set some initial parameters - int nhdp = 21; //number of parameters to pass hydrostatic column - Real *hdp = (Real *) calloc(nhdp,sizeof(Real)); //parameters - hdp[0] = M_vir; - hdp[1] = M_d; - hdp[2] = M_h; - hdp[3] = R_vir; - hdp[4] = c_vir; - hdp[5] = R_s; - hdp[6] = R_d; - hdp[7] = z_d; - hdp[8] = T_d; - hdp[9] = Sigma_0; - hdp[10] = R_g; - hdp[11] = H_g; - hdp[13] = p.gamma; - - //determine rho_eos by setting central density of disk - //based on central temperature - rho_eos = determine_rho_eos_D3D(cs, Sigma_0, hdp); - - //set EOS parameters - //K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma - K_eos = cs*cs*rho_eos; // CHANGED FOR ISOTHERMAL - K_eos_h = cs_h*cs_h*pow(rho_eos_h,1.0-p.gamma)/p.gamma; - - //Store remaining parameters + DiskGalaxy galaxy = galaxies::MW; // NOLINT(cppcoreguidelines-slicing) + // M82 model galaxies::M82; + + M_vir = galaxy.getM_vir(); // viral mass in M_sun + M_d = galaxy.getM_d(); // mass of disk in M_sun (assume all stars) + R_d = galaxy.getR_d(); // stellar disk scale length in kpc + z_d = galaxy.getZ_d(); // stellar disk scale height in kpc + R_vir = galaxy.getR_vir(); // viral radius in kpc + c_vir = galaxy.getC_vir(); // halo concentration (to account for adiabatic + // contraction) + r_cool = galaxy.getR_cool(); // cooling radius in kpc (MW) + + M_h = M_vir - M_d; // halo mass in M_sun + R_s = R_vir / c_vir; // halo scale length in kpc + // T_d = 5.9406e5; // SET TO MATCH K_EOS SET BY HAND for K_eos = 1.859984e-14 + // T_d = 2.0e5; + T_d = 1.0e4; // CHANGED FOR ISOTHERMAL + T_h = 1.0e6; // halo temperature, at density floor + rho_eos = 1.0e7; // gas eos normalized at 1e7 Msun/kpc^3 + rho_eos_h = 3.0e3; // gas eos normalized at 3e3 Msun/kpc^3 (about n_h = 10^-3.5) + mu = 0.6; + + R_g = 2.0 * R_d; // gas scale length in kpc + Sigma_0 = 0.25 * M_d / (2 * M_PI * R_g * R_g); // central surface density in Msun/kpc^2 + H_g = z_d; // initial guess for gas scale height + // rho_floor = 1.0e3; //ICs minimum density in Msun/kpc^3 + + // EOS info + cs = sqrt(KB * T_d / (mu * MP)) * TIME_UNIT / LENGTH_UNIT; // sound speed in kpc/kyr + cs_h = sqrt(KB * T_h / (mu * MP)) * TIME_UNIT / LENGTH_UNIT; // sound speed in kpc/kyr + + // set some initial parameters + int nhdp = 21; // number of parameters to pass hydrostatic column + Real *hdp = (Real *)calloc(nhdp, sizeof(Real)); // parameters + hdp[0] = M_vir; + hdp[1] = M_d; + hdp[2] = M_h; + hdp[3] = R_vir; + hdp[4] = c_vir; + hdp[5] = R_s; + hdp[6] = R_d; + hdp[7] = z_d; + hdp[8] = T_d; + hdp[9] = Sigma_0; + hdp[10] = R_g; + hdp[11] = H_g; + hdp[13] = p.gamma; + + // determine rho_eos by setting central density of disk + // based on central temperature + rho_eos = Determine_Rho_EOS_D3D(cs, Sigma_0, hdp); + + // set EOS parameters + // K_eos = cs*cs*pow(rho_eos,1.0-p.gamma)/p.gamma; //P = K\rho^gamma + K_eos = cs * cs * rho_eos; // CHANGED FOR ISOTHERMAL + K_eos_h = cs_h * cs_h * pow(rho_eos_h, 1.0 - p.gamma) / p.gamma; + + // Store remaining parameters hdp[12] = K_eos; - hdp[14] = 0.0; //rho_floor, set to 0 + hdp[14] = 0.0; // rho_floor, set to 0 hdp[15] = rho_eos; hdp[16] = cs; hdp[17] = K_eos_h; @@ -843,32 +815,28 @@ void Grid3D::Disk_3D(parameters p) hdp[19] = cs_h; hdp[20] = r_cool; - - //Now we can start the density calculation - //we will loop over each column and compute - //the density distribution - int nz = p.nz; - int nzt = 2*H.n_ghost + nz; - Real dz = p.zlen / ((Real) nz); - Real *rho = (Real *) calloc(nzt,sizeof(Real)); - + // Now we can start the density calculation + // we will loop over each column and compute + // the density distribution + int nz = p.nz; + int nzt = 2 * H.n_ghost + nz; + Real dz = p.zlen / ((Real)nz); + Real *rho = (Real *)calloc(nzt, sizeof(Real)); // create a look up table for the halo gas profile - int nr = 1000; - Real dr = sqrt(3)*0.5*fmax(p.xlen, p.zlen) / ((Real) nr); - Real *rho_halo = (Real *) calloc(nr,sizeof(Real)); - Real *r_halo = (Real *) calloc(nr,sizeof(Real)); - + int nr = 1000; + Real dr = sqrt(3) * 0.5 * fmax(p.xlen, p.zlen) / ((Real)nr); + Real *rho_halo = (Real *)calloc(nr, sizeof(Real)); + Real *r_halo = (Real *)calloc(nr, sizeof(Real)); ////////////////////////////////////////////// ////////////////////////////////////////////// // Produce a look up table for a hydrostatic hot halo ////////////////////////////////////////////// ////////////////////////////////////////////// - hydrostatic_ray_analytical_D3D(rho_halo, r_halo, hdp, dr, nr); + Hydrostatic_Ray_Analytical_D3D(rho_halo, r_halo, hdp, dr, nr); chprintf("Hot halo lookup table generated...\n"); - ////////////////////////////////////////////// ////////////////////////////////////////////// // Add a disk component @@ -878,45 +846,44 @@ void Grid3D::Disk_3D(parameters p) // hydrostatic column for the disk // and add the disk density and thermal energy // to the density and energy arrays - for (j=H.n_ghost; j0.0) - { + // restrict to regions where the density + // has been set + if (d > 0.0) { // get the centered x, y, and z positions Get_Position(i, j, k, &x_pos, &y_pos, &z_pos); - // calculate radial position and phi (assumes disk is centered at 0, 0) - r = sqrt(x_pos*x_pos + y_pos*y_pos); - phi = atan2(y_pos, x_pos); // azimuthal angle (in x-y plane) + // calculate radial position and phi (assumes disk is centered at 0, + // 0) + r = sqrt(x_pos * x_pos + y_pos * y_pos); + phi = atan2(y_pos, x_pos); // azimuthal angle (in x-y plane) // radial acceleration from disk - a_d = fabs(gr_disk_D3D(r, z_pos, hdp)); + a_d = fabs(Gr_Disk_D3D(r, z_pos, hdp)); // radial acceleration from halo - a_h = fabs(gr_halo_D3D(r, z_pos, hdp)); + a_h = fabs(Gr_Halo_D3D(r, z_pos, hdp)); // pressure gradient along x direction // gradient calc is first order at boundaries - if (i == H.n_ghost) idm = i + j*H.nx + k*H.nx*H.ny; - else idm = (i-1) + j*H.nx + k*H.nx*H.ny; - if (i == H.nx-H.n_ghost-1) idp = i + j*H.nx + k*H.nx*H.ny; - else idp = (i+1) + j*H.nx + k*H.nx*H.ny; - Get_Position(i-1, j, k, &xpm, &ypm, &zpm); - Get_Position(i+1, j, k, &xpp, &ypp, &zpp); - Pm = C.Energy[idm]*(gama-1.0); // only internal energy stored in energy currently - Pp = C.Energy[idp]*(gama-1.0); // only internal energy stored in energy currently - dPdx = (Pp-Pm)/(xpp-xpm); - - //pressure gradient along y direction - if (j == H.n_ghost) idm = i + j*H.nx + k*H.nx*H.ny; - else idm = i + (j-1)*H.nx + k*H.nx*H.ny; - if (j == H.ny-H.n_ghost-1) idp = i + j*H.nx + k*H.nx*H.ny; - else idp = i + (j+1)*H.nx + k*H.nx*H.ny; - Get_Position(i, j-1, k, &xpm, &ypm, &zpm); - Get_Position(i, j+1, k, &xpp, &ypp, &zpm); - Pm = C.Energy[idm]*(gama-1.0); // only internal energy stored in energy currently - Pp = C.Energy[idp]*(gama-1.0); // only internal energy stored in energy currently - dPdy = (Pp-Pm)/(ypp-ypm); - - //radial pressure gradient - dPdr = x_pos*dPdx/r + y_pos*dPdy/r; - - //radial acceleration - a = a_d + a_h + dPdr/d; - - if(isnan(a)||(a!=a)||(r*a<0)) - { - //printf("i %d j %d k %d a %e a_d %e dPdr %e d %e\n",i,j,k,a,a_d,dPdr,d); - //printf("i %d j %d k %d x_pos %e y_pos %e z_pos %e dPdx %e dPdy %e\n",i,j,k,x_pos,y_pos,z_pos,dPdx,dPdy); - //printf("i %d j %d k %d Pm %e Pp %e\n",i,j,k,Pm,Pp); - //printf("ypp %e ypm %e xpp %e zpm %e r %e\n",ypp,ypm, xpp, xpm ,r); - //printf("Energy pm %e pp %e density pm %e pp %e\n",C.Energy[idm],C.Energy[idp],C.density[idm],C.density[idp]); + if (i == H.n_ghost) { + idm = i + j * H.nx + k * H.nx * H.ny; + } else { + idm = (i - 1) + j * H.nx + k * H.nx * H.ny; } - else { - + if (i == H.nx - H.n_ghost - 1) { + idp = i + j * H.nx + k * H.nx * H.ny; + } else { + idp = (i + 1) + j * H.nx + k * H.nx * H.ny; + } + Get_Position(i - 1, j, k, &xpm, &ypm, &zpm); + Get_Position(i + 1, j, k, &xpp, &ypp, &zpp); + Pm = C.Energy[idm] * (gama - 1.0); // only internal energy stored in energy currently + Pp = C.Energy[idp] * (gama - 1.0); // only internal energy stored in energy currently + dPdx = (Pp - Pm) / (xpp - xpm); + + // pressure gradient along y direction + if (j == H.n_ghost) { + idm = i + j * H.nx + k * H.nx * H.ny; + } else { + idm = i + (j - 1) * H.nx + k * H.nx * H.ny; + } + if (j == H.ny - H.n_ghost - 1) { + idp = i + j * H.nx + k * H.nx * H.ny; + } else { + idp = i + (j + 1) * H.nx + k * H.nx * H.ny; + } + Get_Position(i, j - 1, k, &xpm, &ypm, &zpm); + Get_Position(i, j + 1, k, &xpp, &ypp, &zpm); + Pm = C.Energy[idm] * (gama - 1.0); // only internal energy stored in energy currently + Pp = C.Energy[idp] * (gama - 1.0); // only internal energy stored in energy currently + dPdy = (Pp - Pm) / (ypp - ypm); + + // radial pressure gradient + dPdr = x_pos * dPdx / r + y_pos * dPdy / r; + + // radial acceleration + a = a_d + a_h + dPdr / d; + + if (isnan(a) || (a != a) || (r * a < 0)) { + // printf("i %d j %d k %d a %e a_d %e dPdr %e d + // %e\n",i,j,k,a,a_d,dPdr,d); printf("i %d j %d k %d x_pos %e y_pos + // %e z_pos %e dPdx %e dPdy + // %e\n",i,j,k,x_pos,y_pos,z_pos,dPdx,dPdy); printf("i %d j %d k %d + // Pm %e Pp %e\n",i,j,k,Pm,Pp); printf("ypp %e ypm %e xpp %e zpm %e + // r %e\n",ypp,ypm, xpp, xpm ,r); printf("Energy pm %e pp %e density + // pm %e pp + // %e\n",C.Energy[idm],C.Energy[idp],C.density[idm],C.density[idp]); + } else { // radial velocity - v = sqrt(r*a); - vx = -sin(phi)*v; - vy = cos(phi)*v; + v = sqrt(r * a); + vx = -sin(phi) * v; + vy = cos(phi) * v; vz = 0; // set the momenta - C.momentum_x[id] = d*vx; - C.momentum_y[id] = d*vy; - C.momentum_z[id] = d*vz; - - //sheepishly check for NaN's! + C.momentum_x[id] = d * vx; + C.momentum_y[id] = d * vy; + C.momentum_z[id] = d * vz; - if((d<0)||(P<0)||(isnan(d))||(isnan(P))||(d!=d)||(P!=P)) - printf("d %e P %e i %d j %d k %d id %d\n",d,P,i,j,k,id); + // sheepishly check for NaN's! - if((isnan(vx))||(isnan(vy))||(isnan(vz))||(vx!=vx)||(vy!=vy)||(vz!=vz)) { - printf("vx %e vy %e vz %e i %d j %d k %d id %d\n",vx,vy,vz,i,j,k,id); + if ((d < 0) || (P < 0) || (isnan(d)) || (isnan(P)) || (d != d) || (P != P)) { + printf("d %e P %e i %d j %d k %d id %d\n", d, P, i, j, k, id); } - else { - //if the density is negative, there - //is a bigger problem! - if(d<0) - { - printf("pid %d error negative density i %d j %d k %d d %e\n",-1,i,j,k,d); + + if ((isnan(vx)) || (isnan(vy)) || (isnan(vz)) || (vx != vx) || (vy != vy) || (vz != vz)) { + printf("vx %e vy %e vz %e i %d j %d k %d id %d\n", vx, vy, vz, i, j, k, id); + } else { + // if the density is negative, there + // is a bigger problem! + if (d < 0) { + printf("pid %d error negative density i %d j %d k %d d %e\n", -1, i, j, k, d); } } } @@ -1034,31 +1011,30 @@ void Grid3D::Disk_3D(parameters p) // Add a hot, hydrostatic halo ////////////////////////////////////////////// ////////////////////////////////////////////// - for (k=H.n_ghost; k +#include +#include + #include "../global/global.h" -class DiskGalaxy { - -private: - Real M_vir, M_d, R_d, Z_d, R_vir, c_vir, r_cool, M_h, R_h; - Real log_func(Real y) { - return log(1+y) - y/(1+y); - }; - - -public: - DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) { - M_d = md; - R_d = rd; - Z_d = zd; - M_vir = mvir; - R_vir = rvir; - c_vir = cvir; - r_cool = rcool; - M_h = M_vir - M_d; - R_h = R_vir / c_vir; - }; - - - /** - * Radial acceleration in miyamoto nagai - */ - Real gr_disk_D3D(Real R, Real z) { - Real A = R_d + sqrt(Z_d*Z_d + z*z); - Real B = pow(A*A + R*R, 1.5); - - return -GN*M_d*R/B; - }; - - - /** - * Radial acceleration in NFW halo - */ - Real gr_halo_D3D(Real R, Real z){ - Real r = sqrt(R*R + z*z); //spherical radius - Real x = r / R_h; - Real r_comp = R/r; - - Real A = log_func(x); - Real B = 1.0 / (r*r); - Real C = GN*M_h/log_func(c_vir); - - return -C*A*B*r_comp; - }; - - - /** - * Convenience method that returns the combined radial acceleration - * of a disk galaxy at a specified point. - * @param R the cylindrical radius at the desired point - * @param z the distance perpendicular to the plane of the disk of the desired point - * @return - */ - Real gr_total_D3D(Real R, Real z) { - return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); - }; - - - /** - * Potential of NFW halo - */ - Real phi_halo_D3D(Real R, Real z) { - Real r = sqrt(R * R + z * z); //spherical radius - Real x = r / R_h; - Real C = GN * M_h / (R_h * log_func(c_vir)); - - //limit x to non-zero value - if (x < 1.0e-9) x = 1.0e-9; - - return -C * log(1 + x) / x; - }; - - - /** - * Miyamoto-Nagai potential - */ - Real phi_disk_D3D(Real R, Real z) { - Real A = sqrt(z*z + Z_d*Z_d); - Real B = R_d + A; - Real C = sqrt(R*R + B*B); - - //patel et al. 2017, eqn 2 - return -GN * M_d / C; - }; - - Real rho_disk_D3D(const Real r, const Real z) { - const Real a = R_d; - const Real c = Z_d; - const Real b = sqrt(z*z+c*c); - const Real d = a+b; - const Real s = r*r+d*d; - return M_d*c*c*(a*(d*d+r*r)+3.0*b*d*d)/(4.0*M_PI*b*b*b*pow(s,2.5)); +class DiskGalaxy +{ + private: + Real M_vir, M_d, R_d, Z_d, R_vir, c_vir, r_cool, M_h, R_h; + Real log_func(Real y) { return log(1 + y) - y / (1 + y); }; + + public: + DiskGalaxy(Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) + { + M_d = md; + R_d = rd; + Z_d = zd; + M_vir = mvir; + R_vir = rvir; + c_vir = cvir; + r_cool = rcool; + M_h = M_vir - M_d; + R_h = R_vir / c_vir; + }; + + /** + * Radial acceleration in miyamoto nagai + */ + Real gr_disk_D3D(Real R, Real z) + { + Real A = R_d + sqrt(Z_d * Z_d + z * z); + Real B = pow(A * A + R * R, 1.5); + + return -GN * M_d * R / B; + }; + + /** + * Radial acceleration in NFW halo + */ + Real gr_halo_D3D(Real R, Real z) + { + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real r_comp = R / r; + + Real A = log_func(x); + Real B = 1.0 / (r * r); + Real C = GN * M_h / log_func(c_vir); + + return -C * A * B * r_comp; + }; + + /** + * Convenience method that returns the combined radial acceleration + * of a disk galaxy at a specified point. + * @param R the cylindrical radius at the desired point + * @param z the distance perpendicular to the plane of the disk of the desired + * point + * @return + */ + Real gr_total_D3D(Real R, Real z) { return gr_disk_D3D(R, z) + gr_halo_D3D(R, z); }; + + /** + * Potential of NFW halo + */ + Real phi_halo_D3D(Real R, Real z) + { + Real r = sqrt(R * R + z * z); // spherical radius + Real x = r / R_h; + Real C = GN * M_h / (R_h * log_func(c_vir)); + + // limit x to non-zero value + if (x < 1.0e-9) { + x = 1.0e-9; } - /** - * Convenience method that returns the combined gravitational potential - * of the disk and halo. - */ - Real phi_total_D3D(Real R, Real z) { - return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); - }; - - - /** - * epicyclic frequency - */ - Real kappa2(Real R, Real z) { - Real r = sqrt(R*R + z*z); - Real x = r/R_h; - Real C = GN * M_h / (R_h * log_func(c_vir)); - Real A = R_d + sqrt(z*z + Z_d*Z_d); - Real B = sqrt(R*R + A*A); - - Real phiH_prime = -C*R/(r*r)/(1 + x) + C*log(1+x)*R_h*R/(r*r*r) + GN*M_d*R/(B*B*B); - Real phiH_prime_prime = -C/(r*r)/(1+x) + 2*C*R*R/(r*r*r*r)/(1+x) + C/((1+x)*(1+x))*R*R/R_h/(r*r*r) + - C*R*R/(1+x)/(r*r*r*r) + C*log(1+x)*R_h/(r*r*r)*(1 - 3*R*R/(r*r)) + - GN*M_d/(B*B*B)*(1 - 3*R*R/(B*B)); - - return 3/R*phiH_prime + phiH_prime_prime; - }; - - - Real surface_density(Real R) { - return M_d/(2*M_PI)/(R_d*R_d)*exp(-R/R_d); - }; - - Real sigma_crit(Real R) { - return 3.36*GN*surface_density(R)/sqrt(kappa2(R,0.0)); - }; - - - Real getM_d() const { return M_d; }; - Real getR_d() const { return R_d; }; - Real getZ_d() const { return Z_d; }; - + return -C * log(1 + x) / x; + }; + + /** + * Miyamoto-Nagai potential + */ + Real phi_disk_D3D(Real R, Real z) + { + Real A = sqrt(z * z + Z_d * Z_d); + Real B = R_d + A; + Real C = sqrt(R * R + B * B); + + // patel et al. 2017, eqn 2 + return -GN * M_d / C; + }; + + Real rho_disk_D3D(const Real r, const Real z) + { + const Real a = R_d; + const Real c = Z_d; + const Real b = sqrt(z * z + c * c); + const Real d = a + b; + const Real s = r * r + d * d; + return M_d * c * c * (a * (d * d + r * r) + 3.0 * b * d * d) / (4.0 * M_PI * b * b * b * pow(s, 2.5)); + } + + /** + * Convenience method that returns the combined gravitational potential + * of the disk and halo. + */ + Real phi_total_D3D(Real R, Real z) { return phi_halo_D3D(R, z) + phi_disk_D3D(R, z); }; + + /** + * epicyclic frequency + */ + Real kappa2(Real R, Real z) + { + Real r = sqrt(R * R + z * z); + Real x = r / R_h; + Real C = GN * M_h / (R_h * log_func(c_vir)); + Real A = R_d + sqrt(z * z + Z_d * Z_d); + Real B = sqrt(R * R + A * A); + + Real phiH_prime = -C * R / (r * r) / (1 + x) + C * log(1 + x) * R_h * R / (r * r * r) + GN * M_d * R / (B * B * B); + Real phiH_prime_prime = -C / (r * r) / (1 + x) + 2 * C * R * R / (r * r * r * r) / (1 + x) + + C / ((1 + x) * (1 + x)) * R * R / R_h / (r * r * r) + + C * R * R / (1 + x) / (r * r * r * r) + + C * log(1 + x) * R_h / (r * r * r) * (1 - 3 * R * R / (r * r)) + + GN * M_d / (B * B * B) * (1 - 3 * R * R / (B * B)); + + return 3 / R * phiH_prime + phiH_prime_prime; + }; + + Real surface_density(Real R) { return M_d / (2 * M_PI) / (R_d * R_d) * exp(-R / R_d); }; + + Real sigma_crit(Real R) { return 3.36 * GN * surface_density(R) / sqrt(kappa2(R, 0.0)); }; + + Real getM_d() const { return M_d; }; + Real getR_d() const { return R_d; }; + Real getZ_d() const { return Z_d; }; + Real getM_vir() const { return M_vir; }; + Real getR_vir() const { return R_vir; }; + Real getC_vir() const { return c_vir; }; + Real getR_cool() const { return r_cool; }; }; -namespace Galaxies { - // all masses in M_sun and all distances in kpc - //static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); - static DiskGalaxy MW(6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); - static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8/0.015, 10, 100.0); +class ClusteredDiskGalaxy : public DiskGalaxy +{ + private: + Real lower_cluster_mass, higher_cluster_mass; + Real normalization; + + public: + ClusteredDiskGalaxy(Real lm, Real hm, Real md, Real rd, Real zd, Real mvir, Real rvir, Real cvir, Real rcool) + : DiskGalaxy{md, rd, zd, mvir, rvir, cvir, rcool}, lower_cluster_mass{lm}, higher_cluster_mass{hm} + { + // if (lower_cluster_mass >= higher_cluster_mass) + normalization = 1 / log(higher_cluster_mass / lower_cluster_mass); + }; + + Real getLowerClusterMass() const { return lower_cluster_mass; } + Real getHigherClusterMass() const { return higher_cluster_mass; } + Real getNormalization() const { return normalization; } + + std::vector generateClusterPopulationMasses(int N, std::mt19937_64 generator) + { + std::vector population(N); + for (int i = 0; i < N; i++) { + population[singleClusterMass(generator)]; + } + return population; + } + + Real singleClusterMass(std::mt19937_64 generator) + { + std::uniform_real_distribution uniform_distro(0, 1); + return lower_cluster_mass * exp(uniform_distro(generator) / normalization); + } }; -#endif //DISK_GALAXY +namespace galaxies +{ +// all masses in M_sun and all distances in kpc +// static DiskGalaxy MW(6.5e10, 3.5, (3.5/5.0), 1.0e12, 261, 20, 157.0); +static ClusteredDiskGalaxy MW(1e4, 5e5, 6.5e10, 2.7, 0.7, 1.077e12, 261, 18, 157.0); +static DiskGalaxy M82(1.0e10, 0.8, 0.15, 5.0e10, 0.8 / 0.015, 10, 100.0); +}; // namespace galaxies + +#endif // DISK_GALAXY diff --git a/src/mpi/MPI_Comm_node.c b/src/mpi/MPI_Comm_node.c deleted file mode 100644 index 057233d8c..000000000 --- a/src/mpi/MPI_Comm_node.c +++ /dev/null @@ -1,67 +0,0 @@ -#ifdef MPI_CHOLLA -#include -#include -#include -#include "../mpi/MPI_Comm_node.h" - - -/*! \fn int djb2_hash(char *str) - * \brief Simple hash function by Dan Bernstein */ -int djb2_hash(char *str); - -/*! \fn MPI_Comm MPI_Comm_node(void) - * \brief Returns an MPI_Comm for processes on each node.*/ -MPI_Comm MPI_Comm_node(int *myid_node, int *nproc_node) -{ - int myid; //global rank - int nproc; //global rank - char pname[MPI_MAX_PROCESSOR_NAME]; //node hostname - int pname_length; //length of node hostname - int hash; //hash of node hostname - - MPI_Comm node_comm; //communicator for the procs on each node - - //get the global process rank - MPI_Comm_rank(MPI_COMM_WORLD,&myid); - MPI_Comm_size(MPI_COMM_WORLD,&nproc); - - - //if we're the only process, then just return - //the global rank, size, and comm - if(nproc==1) - { - *myid_node = myid; - *nproc_node = nproc; - return MPI_COMM_WORLD; - } - - //get the hostname of the node - MPI_Get_processor_name(pname, &pname_length); - - //hash the name of the node - hash = abs(djb2_hash(pname)); - - //printf("hash %d\n",hash); - - //split the communicator - MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm); - - //get size and rank - MPI_Comm_rank(node_comm,myid_node); - MPI_Comm_size(node_comm,nproc_node); - - //return the communicator for processors on the node - return node_comm; -} - -/*! \fn int djb2_hash(char *str) - * \brief Simple hash function by Dan Bernstein */ -int djb2_hash(char *str) -{ - int hash = 5381; - int c; - while((c = *str++)) - hash = ((hash<<5) + hash) + c; /*hash*33 + c*/ - return hash; -} -#endif /*MPI_CHOLLA*/ diff --git a/src/mpi/MPI_Comm_node.h b/src/mpi/MPI_Comm_node.h deleted file mode 100644 index 0d8820d02..000000000 --- a/src/mpi/MPI_Comm_node.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef MPI_COMM_NODE -#define MPI_COMM_NODE - -#include - -#ifdef __cplusplus -extern "C" { -#endif //__cplusplus -MPI_Comm MPI_Comm_node(int *pid, int *np); -#ifdef __cplusplus -} -#endif //__cplusplus - -#endif //MPI_COMM_NODE diff --git a/src/mpi/cuda_mpi_routines.cu b/src/mpi/cuda_mpi_routines.cu index 3b2e65e51..987d5fe67 100644 --- a/src/mpi/cuda_mpi_routines.cu +++ b/src/mpi/cuda_mpi_routines.cu @@ -1,8 +1,9 @@ #ifdef MPI_CHOLLA -#include -#include "../utils/gpu.hpp" -#include "../io/io.h" -#include "../mpi/cuda_mpi_routines.h" + #include + + #include "../io/io.h" + #include "../mpi/cuda_mpi_routines.h" + #include "../utils/gpu.hpp" // #define PRINT_DEVICE_IDS @@ -10,43 +11,52 @@ * \brief CUDA initialization within MPI. */ int initialize_cuda_mpi(int myid, int nprocs) { - int i_device = 0; //GPU device for this process - int n_device; //number of GPU devices available + int i_device = 0; // GPU device for this process + int n_device; // number of GPU devices available cudaError_t flag_error; - //get the number of cuda devices + // get the number of cuda devices flag_error = cudaGetDeviceCount(&n_device); - //check for errors - if(flag_error!=cudaSuccess) - { - if(flag_error==cudaErrorNoDevice) - fprintf(stderr,"cudaGetDeviceCount: Error! for myid = %d and n_device = %d; cudaErrorNoDevice\n",myid,n_device); - if(flag_error==cudaErrorInsufficientDriver) - fprintf(stderr,"cudaGetDeviceCount: Error! for myid = %d and n_device = %d; cudaErrorInsufficientDriver\n",myid,n_device); + // check for errors + if (flag_error != cudaSuccess) { + if (flag_error == cudaErrorNoDevice) { + fprintf(stderr, + "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; " + "cudaErrorNoDevice\n", + myid, n_device); + } + if (flag_error == cudaErrorInsufficientDriver) { + fprintf(stderr, + "cudaGetDeviceCount: Error! for myid = %d and n_device = %d; " + "cudaErrorInsufficientDriver\n", + myid, n_device); + } fflush(stderr); return 1; } - //get host name - char pname[MPI_MAX_PROCESSOR_NAME]; //node hostname - int pname_length; //length of node hostname + // get host name + char pname[MPI_MAX_PROCESSOR_NAME]; // node hostname + int pname_length; // length of node hostname MPI_Get_processor_name(pname, &pname_length); - //set a cuda device for each process - cudaSetDevice(myid%n_device); + // set a cuda device for each process + cudaSetDevice(myid % n_device); - //double check + // double check cudaGetDevice(&i_device); #ifdef PRINT_DEVICE_IDS - printf("In initialize_cuda_mpi: name:%s myid = %d, i_device = %d, n_device = %d\n",pname, myid,i_device,n_device); + printf( + "In initialize_cuda_mpi: name:%s myid = %d, i_device = %d, n_device = " + "%d\n", + pname, myid, i_device, n_device); fflush(stdout); MPI_Barrier(world); #endif return 0; - } -#endif //MPI_CHOLLA +#endif // MPI_CHOLLA diff --git a/src/mpi/cuda_mpi_routines.h b/src/mpi/cuda_mpi_routines.h index d408afae6..31b1f89a0 100644 --- a/src/mpi/cuda_mpi_routines.h +++ b/src/mpi/cuda_mpi_routines.h @@ -3,7 +3,7 @@ #ifdef __cplusplus extern "C" { -#endif //__cplusplus +#endif //__cplusplus /*! \fn int initialize_cuda_mpi(int myid, int nprocs); * \brief CUDA initialization within MPI. */ @@ -11,7 +11,6 @@ int initialize_cuda_mpi(int myid, int nprocs); #ifdef __cplusplus } -#endif //__cplusplus +#endif //__cplusplus - -#endif //CUDA_MPI_ROUTINES +#endif // CUDA_MPI_ROUTINES diff --git a/src/mpi/mpi_routines.cpp b/src/mpi/mpi_routines.cpp index 6d51ee643..8f6c533e0 100644 --- a/src/mpi/mpi_routines.cpp +++ b/src/mpi/mpi_routines.cpp @@ -1,40 +1,40 @@ #ifdef MPI_CHOLLA -#include -#include -#include "../mpi/mpi_routines.h" -#include "../global/global.h" -#include "../utils/error_handling.h" -#include "../io/io.h" -#include "../mpi/cuda_mpi_routines.h" -#include "../mpi/MPI_Comm_node.h" -#include + #include "../mpi/mpi_routines.h" + + #include + #include + + #include + #include + + #include "../global/global.h" + #include "../io/io.h" + #include "../mpi/cuda_mpi_routines.h" + #include "../utils/error_handling.h" /*Global MPI Variables*/ -int procID; /*process rank*/ -int nproc; /*number of processes in global comm*/ -int root; /*rank of root process*/ +// note: some relevant global variables are declared in global.h int procID_node; /*process rank on node*/ int nproc_node; /*number of MPI processes on node*/ MPI_Comm world; /*global communicator*/ -MPI_Comm node; /*global communicator*/ MPI_Datatype MPI_CHREAL; /*set equal to MPI_FLOAT or MPI_DOUBLE*/ -#ifdef PARTICLES + #ifdef PARTICLES MPI_Datatype MPI_PART_INT; /*set equal to MPI_INT or MPI_LONG*/ -#endif + #endif -//MPI_Requests for nonblocking comm +// MPI_Requests for nonblocking comm MPI_Request *send_request; MPI_Request *recv_request; -//MPI destinations and sources +// MPI destinations and sources int dest[6]; int source[6]; -//Communication buffers +// Communication buffers // For BLOCK Real *d_send_buffer_x0; @@ -69,8 +69,8 @@ int x_buffer_length; int y_buffer_length; int z_buffer_length; -#ifdef PARTICLES -//Buffers for particles transfers + #ifdef PARTICLES +// Buffers for particles transfers Real *d_send_buffer_x0_particles; Real *d_send_buffer_x1_particles; Real *d_send_buffer_y0_particles; @@ -84,7 +84,7 @@ Real *d_recv_buffer_y1_particles; Real *d_recv_buffer_z0_particles; Real *d_recv_buffer_z1_particles; -//Buffers for particles transfers +// Buffers for particles transfers Real *h_send_buffer_x0_particles; Real *h_send_buffer_x1_particles; Real *h_send_buffer_y0_particles; @@ -118,7 +118,7 @@ MPI_Request *recv_request_n_particles; // Request for Particles Transfer MPI_Request *send_request_particles_transfer; MPI_Request *recv_request_particles_transfer; -#endif//PARTICLES + #endif // PARTICLES /*local domain sizes*/ /*none of these include ghost cells!*/ @@ -137,16 +137,14 @@ int nproc_x; int nproc_y; int nproc_z; -#ifdef FFTW + #ifdef FFTW ptrdiff_t n_local_complex; -#endif /*FFTW*/ - + #endif /*FFTW*/ /*\fn void InitializeChollaMPI(void) */ /* Routine to initialize MPI */ void InitializeChollaMPI(int *pargc, char **pargv[]) { - /*initialize MPI*/ MPI_Init(pargc, pargv); @@ -157,7 +155,7 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) MPI_Comm_size(MPI_COMM_WORLD, &nproc); /*print a cute message*/ - //printf("Processor %d of %d: Hello!\n", procID, nproc); + // printf("Processor %d of %d: Hello!\n", procID, nproc); /* set the root process rank */ root = 0; @@ -174,108 +172,111 @@ void InitializeChollaMPI(int *pargc, char **pargv[]) #endif /*PRECISION*/ #ifdef PARTICLES - #ifdef PARTICLES_LONG_INTS + #ifdef PARTICLES_LONG_INTS MPI_PART_INT = MPI_LONG; - #else + #else MPI_PART_INT = MPI_INT; - #endif + #endif #endif - /*create the MPI_Request arrays for non-blocking sends*/ - if(!(send_request = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + /*create the MPI_Request arrays for non-blocking sends. If the malloc fails then print an error and exit*/ + send_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!send_request) { chprintf("Error allocating send_request.\n"); chexit(-2); } - if(!(recv_request = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + recv_request = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!recv_request) { chprintf("Error allocating recv_request.\n"); chexit(-2); } #ifdef PARTICLES - if(!(send_request_n_particles = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { - chprintf("Error allocating send_request for number of particles for transfer.\n"); + send_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!send_request_n_particles) { + chprintf( + "Error allocating send_request for number of particles for " + "transfer.\n"); chexit(-2); } - if(!(recv_request_n_particles = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { - chprintf("Error allocating recv_request for number of particles for transfer.\n"); + recv_request_n_particles = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!recv_request_n_particles) { + chprintf( + "Error allocating recv_request for number of particles for " + "transfer.\n"); chexit(-2); } - - if(!(send_request_particles_transfer = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + send_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!send_request_particles_transfer) { chprintf("Error allocating send_request for particles transfer.\n"); chexit(-2); } - if(!(recv_request_particles_transfer = (MPI_Request *) malloc(2*sizeof(MPI_Request)))) - { + recv_request_particles_transfer = (MPI_Request *)malloc(2 * sizeof(MPI_Request)); + if (!recv_request_particles_transfer) { chprintf("Error allocating recv_request for particles transfer.\n"); chexit(-2); } #endif /*set up node communicator*/ - node = MPI_Comm_node(&procID_node, &nproc_node); + std::tie(procID_node, nproc_node) = MPI_Comm_node(); + // #ifdef ONLY_PARTICLES // chprintf("ONLY_PARTICLES: Initializing without CUDA support.\n"); // #else // #ifndef GRAVITY // // Needed to initialize cuda after gravity in order to work on Summit // //initialize cuda for use with mpi - #ifdef CUDA - if(initialize_cuda_mpi(procID_node,nproc_node)) - { + + if (initialize_cuda_mpi(procID_node, nproc_node)) { chprintf("Error initializing cuda with mpi.\n"); chexit(-10); } - #endif /*CUDA*/ // #endif//ONLY_PARTICLES - } - - /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) +void DomainDecomposition(struct Parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) { - DomainDecompositionBLOCK(P, H, nx_gin, ny_gin, nz_gin); // set grid dimensions - H->nx = nx_local+2*H->n_ghost; + H->nx = nx_local + 2 * H->n_ghost; H->nx_real = nx_local; - if (ny_local == 1) H->ny = 1; - else H->ny = ny_local+2*H->n_ghost; + if (ny_local == 1) { + H->ny = 1; + } else { + H->ny = ny_local + 2 * H->n_ghost; + } H->ny_real = ny_local; - if (nz_local == 1) H->nz = 1; - else H->nz = nz_local+2*H->n_ghost; + if (nz_local == 1) { + H->nz = 1; + } else { + H->nz = nz_local + 2 * H->n_ghost; + } H->nz_real = nz_local; // set total number of cells H->n_cells = H->nx * H->ny * H->nz; - //printf("In DomainDecomposition: nx %d ny %d nz %d nc %d\n",H->nx,H->ny,H->nz,H->n_cells); + // printf("In DomainDecomposition: nx %d ny %d nz %d nc + // %d\n",H->nx,H->ny,H->nz,H->n_cells); - //Allocate communication buffers + // Allocate communication buffers Allocate_MPI_DeviceBuffers(H); - } /* Perform domain decomposition */ -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) +void DomainDecompositionBLOCK(struct Parameters *P, struct Header *H, int nx_gin, int ny_gin, int nz_gin) { int n; - int i,j,k; + int i, j, k; int *ix; int *iy; int *iz; - //enforce an even number of processes - if(nproc%2 && nproc>1) - { + // enforce an even number of processes + if (nproc % 2 && nproc > 1) { chprintf("WARNING: Odd number of processors > 1 is not officially supported\n"); } @@ -285,19 +286,19 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin nz_global = nz_gin; /*allocate subdomain indices*/ - ix = (int *)malloc(nproc*sizeof(int)); - iy = (int *)malloc(nproc*sizeof(int)); - iz = (int *)malloc(nproc*sizeof(int)); + ix = (int *)malloc(nproc * sizeof(int)); + iy = (int *)malloc(nproc * sizeof(int)); + iz = (int *)malloc(nproc * sizeof(int)); /*tile the MPI processes in blocks*/ /*this sets nproc_x, nproc_y, nproc_z */ - //chprintf("About to enter tiling block decomp\n"); + // chprintf("About to enter tiling block decomp\n"); MPI_Barrier(world); TileBlockDecomposition(); if (nz_global > nx_global) { int tmp; - tmp = nproc_x; + tmp = nproc_x; nproc_x = nproc_z; nproc_z = tmp; } @@ -308,118 +309,118 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin nproc_y = P->n_proc_y; nproc_z = P->n_proc_z; chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", nproc_x, nproc_y, nproc_z); - // chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", P->n_proc_x, P->n_proc_y, P->n_proc_z); + // chprintf("Setting MPI grid: nx=%d ny=%d nz=%d\n", P->n_proc_x, + // P->n_proc_y, P->n_proc_z); #endif - //chprintf("Allocating tiling.\n"); + // chprintf("Allocating tiling.\n"); MPI_Barrier(world); - int ***tiling = three_dimensional_int_array(nproc_x,nproc_y,nproc_z); + int ***tiling = three_dimensional_int_array(nproc_x, nproc_y, nproc_z); - - //find indices - //chprintf("Setting indices.\n"); + // find indices + // chprintf("Setting indices.\n"); MPI_Barrier(world); n = 0; - //Gravity: Change the order of MPI processes assignment to match the assignment done by PFFT - //Original: - // for(i=0;i=nproc_x) + } + dest[1] = i + 1; + if (dest[1] >= nproc_x) { dest[1] -= nproc_x; + } - dest[2] = j-1; - if(dest[2]<0) + dest[2] = j - 1; + if (dest[2] < 0) { dest[2] += nproc_y; - dest[3] = j+1; - if(dest[3]>=nproc_y) + } + dest[3] = j + 1; + if (dest[3] >= nproc_y) { dest[3] -= nproc_y; + } - dest[4] = k-1; - if(dest[4]<0) + dest[4] = k - 1; + if (dest[4] < 0) { dest[4] += nproc_z; - dest[5] = k+1; - if(dest[5]>=nproc_z) + } + dest[5] = k + 1; + if (dest[5] >= nproc_z) { dest[5] -= nproc_z; + } } n++; } + } + } /* set local x, y, z subdomain sizes */ - n = nx_global%nproc_x; - if(!n) - { - //nx_global splits evenly along x procs*/ - nx_local = nx_global/nproc_x; - nx_local_start = ix[procID]*nx_local; - }else{ - nx_local = nx_global/nproc_x; - if(ix[procID]xu_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->xl_bcnd==1) P->xl_bcnd = 5; - }else{ + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->xl_bcnd == 1) { + P->xl_bcnd = 5; + } + } else { P->xl_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->xu_bcnd==1) P->xu_bcnd = 5; + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->xu_bcnd == 1) { + P->xu_bcnd = 5; + } } - }else{ - //this is completely an interior cell - //along the x direction, so - //set both x bcnds to MPI bcnds + } else { + // this is completely an interior cell + // along the x direction, so + // set both x bcnds to MPI bcnds P->xl_bcnd = 5; P->xu_bcnd = 5; } @@ -478,23 +480,25 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin /*do y bcnds next*/ /*exterior faces have to be treated separately*/ /*as long as there is more than one cell in the x direction*/ - if (nproc_y!=1) { - if((iy[procID]==0)||(iy[procID]==nproc_y-1)) - { - if(iy[procID]==0) - { + if (nproc_y != 1) { + if ((iy[procID] == 0) || (iy[procID] == nproc_y - 1)) { + if (iy[procID] == 0) { P->yu_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->yl_bcnd==1) P->yl_bcnd = 5; - }else{ + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->yl_bcnd == 1) { + P->yl_bcnd = 5; + } + } else { P->yl_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->yu_bcnd==1) P->yu_bcnd = 5; + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->yu_bcnd == 1) { + P->yu_bcnd = 5; + } } - }else{ - //this is completely an interior cell - //along the y direction, so - //set both y bcnds to MPI bcnds + } else { + // this is completely an interior cell + // along the y direction, so + // set both y bcnds to MPI bcnds P->yl_bcnd = 5; P->yu_bcnd = 5; } @@ -503,55 +507,54 @@ void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_gin /*do z bcnds next*/ /*exterior faces have to be treated separately*/ /*as long as there is more than one cell in the x direction*/ - if(nproc_z!=1) { - if((iz[procID]==0)||(iz[procID]==nproc_z-1)) - { - if(iz[procID]==0) - { + if (nproc_z != 1) { + if ((iz[procID] == 0) || (iz[procID] == nproc_z - 1)) { + if (iz[procID] == 0) { P->zu_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->zl_bcnd==1) P->zl_bcnd = 5; - }else{ + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->zl_bcnd == 1) { + P->zl_bcnd = 5; + } + } else { P->zl_bcnd = 5; - //if the global bcnd is periodic, use MPI bcnds at ends - if(P->zu_bcnd==1) P->zu_bcnd = 5; + // if the global bcnd is periodic, use MPI bcnds at ends + if (P->zu_bcnd == 1) { + P->zu_bcnd = 5; + } } - }else{ - //this is completely an interior cell - //along the z direction, so - //set both z bcnds to MPI bcnds + } else { + // this is completely an interior cell + // along the z direction, so + // set both z bcnds to MPI bcnds P->zl_bcnd = 5; P->zu_bcnd = 5; } } - - //free indices + // free indices free(ix); free(iy); free(iz); - } void Allocate_MPI_DeviceBuffers(struct Header *H) { - int xbsize, ybsize, zbsize; - if (H->ny==1 && H->nz==1) { - xbsize = H->n_fields*H->n_ghost; - ybsize = 1; - zbsize = 1; + int xbsize = 1, ybsize = 1, zbsize = 1; + if (H->ny == 1 && H->nz == 1) { + xbsize = H->n_fields * H->n_ghost; } // 2D - if (H->ny>1 && H->nz==1) { - xbsize = H->n_fields*H->n_ghost*(H->ny-2*H->n_ghost); - ybsize = H->n_fields*H->n_ghost*(H->nx); - zbsize = 1; + else if (H->ny > 1 && H->nz == 1) { + xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost); + ybsize = H->n_fields * H->n_ghost * (H->nx); } // 3D - if (H->ny>1 && H->nz>1) { - xbsize = H->n_fields*H->n_ghost*(H->ny-2*H->n_ghost)*(H->nz-2*H->n_ghost); - ybsize = H->n_fields*H->n_ghost*(H->nx)*(H->nz-2*H->n_ghost); - zbsize = H->n_fields*H->n_ghost*(H->nx)*(H->ny); + else if (H->ny > 1 && H->nz > 1) { + xbsize = H->n_fields * H->n_ghost * (H->ny - 2 * H->n_ghost) * (H->nz - 2 * H->n_ghost); + ybsize = H->n_fields * H->n_ghost * (H->nx) * (H->nz - 2 * H->n_ghost); + zbsize = H->n_fields * H->n_ghost * (H->nx) * (H->ny); + } else { + throw std::runtime_error("MPI buffer size failed to set."); } x_buffer_length = xbsize; @@ -560,22 +563,22 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) #ifdef PARTICLES // Set Initial sizes for particles buffers - int n_max = std::max( H->nx, H->ny ); - n_max = std::max( H->nz, n_max ); - int factor = 2; - N_PARTICLES_TRANSFER = n_max * n_max * factor ; + int n_max = std::max(H->nx, H->ny); + n_max = std::max(H->nz, n_max); + int factor = 2; + N_PARTICLES_TRANSFER = n_max * n_max * factor; // Set the number of values that will be transferred for each particle - N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities - #ifndef SINGLE_PARTICLE_MASS - N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle mass - #endif - #ifdef PARTICLE_IDS - N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle ID - #endif - #ifdef PARTICLE_AGE - N_DATA_PER_PARTICLE_TRANSFER += 1; //one more for the particle age - #endif + N_DATA_PER_PARTICLE_TRANSFER = 6; // 3 positions and 3 velocities + #ifndef SINGLE_PARTICLE_MASS + N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle mass + #endif + #ifdef PARTICLE_IDS + N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle ID + #endif + #ifdef PARTICLE_AGE + N_DATA_PER_PARTICLE_TRANSFER += 1; // one more for the particle age + #endif buffer_length_particles_x0_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; buffer_length_particles_x0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; @@ -589,84 +592,90 @@ void Allocate_MPI_DeviceBuffers(struct Header *H) buffer_length_particles_z0_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; buffer_length_particles_z1_send = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; buffer_length_particles_z1_recv = N_PARTICLES_TRANSFER * N_DATA_PER_PARTICLE_TRANSFER; - #endif //PARTICLES + #endif // PARTICLES chprintf("Allocating MPI communication buffers on GPU "); chprintf("(nx = %ld, ny = %ld, nz = %ld).\n", xbsize, ybsize, zbsize); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x0, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x1, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x0, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x1, xbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y0, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y1, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y0, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1, ybsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z0, zbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z1, zbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0, zbsize*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1, zbsize*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x0, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x1, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x0, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x1, xbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y0, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y1, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y0, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y1, ybsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z0, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z1, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z0, zbsize * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z1, zbsize * sizeof(Real))); #if !defined(MPI_GPU) - h_send_buffer_x0 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_send_buffer_x1 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_recv_buffer_x0 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_recv_buffer_x1 = (Real *) malloc ( xbsize*sizeof(Real) ); - h_send_buffer_y0 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_send_buffer_y1 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_recv_buffer_y0 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_recv_buffer_y1 = (Real *) malloc ( ybsize*sizeof(Real) ); - h_send_buffer_z0 = (Real *) malloc ( zbsize*sizeof(Real) ); - h_send_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) ); - h_recv_buffer_z0 = (Real *) malloc ( zbsize*sizeof(Real) ); - h_recv_buffer_z1 = (Real *) malloc ( zbsize*sizeof(Real) ); + h_send_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real)); + h_send_buffer_x1 = (Real *)malloc(xbsize * sizeof(Real)); + h_recv_buffer_x0 = (Real *)malloc(xbsize * sizeof(Real)); + h_recv_buffer_x1 = (Real *)malloc(xbsize * sizeof(Real)); + h_send_buffer_y0 = (Real *)malloc(ybsize * sizeof(Real)); + h_send_buffer_y1 = (Real *)malloc(ybsize * sizeof(Real)); + h_recv_buffer_y0 = (Real *)malloc(ybsize * sizeof(Real)); + h_recv_buffer_y1 = (Real *)malloc(ybsize * sizeof(Real)); + h_send_buffer_z0 = (Real *)malloc(zbsize * sizeof(Real)); + h_send_buffer_z1 = (Real *)malloc(zbsize * sizeof(Real)); + h_recv_buffer_z0 = (Real *)malloc(zbsize * sizeof(Real)); + h_recv_buffer_z1 = (Real *)malloc(zbsize * sizeof(Real)); #endif - // NOTE: When changing this ifdef check for compatibility with + // NOTE: When changing this ifdef check for compatibility with // Grid3D::Load_NTtransfer_and_Request_Receive_Particles_Transfer // in particles/particles_boundaries.cpp - // Whether or not MPI_GPU is on, the device has transfer buffers for PARTICLES_GPU + // Whether or not MPI_GPU is on, the device has transfer buffers for + // PARTICLES_GPU #if defined(PARTICLES) && defined(PARTICLES_GPU) - chprintf("Allocating MPI communication buffers on GPU for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x0_particles, buffer_length_particles_x0_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_x1_particles, buffer_length_particles_x1_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y0_particles, buffer_length_particles_y0_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_y1_particles, buffer_length_particles_y1_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z0_particles, buffer_length_particles_z0_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_send_buffer_z1_particles, buffer_length_particles_z1_send*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv*sizeof(Real)) ); - CudaSafeCall ( cudaMalloc (&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv*sizeof(Real)) ); - #endif // PARTICLES && PARTICLES_GPU + chprintf( + "Allocating MPI communication buffers on GPU for particle transfers ( " + "N_Particles: %d ).\n", + N_PARTICLES_TRANSFER); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x0_particles, buffer_length_particles_x0_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_x1_particles, buffer_length_particles_x1_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y0_particles, buffer_length_particles_y0_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_y1_particles, buffer_length_particles_y1_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z0_particles, buffer_length_particles_z0_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_send_buffer_z1_particles, buffer_length_particles_z1_send * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real))); + #endif // PARTICLES && PARTICLES_GPU // CPU relies on host buffers, GPU without MPI_GPU relies on host buffers #ifdef PARTICLES - #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) - chprintf("Allocating MPI communication buffers on Host for particle transfers ( N_Particles: %d ).\n", N_PARTICLES_TRANSFER ); - h_send_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_send*sizeof(Real) ); - h_send_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_send*sizeof(Real) ); - h_send_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_send*sizeof(Real) ); - h_send_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_send*sizeof(Real) ); - h_send_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_send*sizeof(Real) ); - h_send_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_send*sizeof(Real) ); - h_recv_buffer_x0_particles = (Real *) malloc ( buffer_length_particles_x0_recv*sizeof(Real) ); - h_recv_buffer_x1_particles = (Real *) malloc ( buffer_length_particles_x1_recv*sizeof(Real) ); - h_recv_buffer_y0_particles = (Real *) malloc ( buffer_length_particles_y0_recv*sizeof(Real) ); - h_recv_buffer_y1_particles = (Real *) malloc ( buffer_length_particles_y1_recv*sizeof(Real) ); - h_recv_buffer_z0_particles = (Real *) malloc ( buffer_length_particles_z0_recv*sizeof(Real) ); - h_recv_buffer_z1_particles = (Real *) malloc ( buffer_length_particles_z1_recv*sizeof(Real) ); - #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) - #endif //PARTICLES - + #if (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || defined(PARTICLES_CPU) + chprintf( + "Allocating MPI communication buffers on Host for particle transfers ( " + "N_Particles: %d ).\n", + N_PARTICLES_TRANSFER); + h_send_buffer_x0_particles = (Real *)malloc(buffer_length_particles_x0_send * sizeof(Real)); + h_send_buffer_x1_particles = (Real *)malloc(buffer_length_particles_x1_send * sizeof(Real)); + h_send_buffer_y0_particles = (Real *)malloc(buffer_length_particles_y0_send * sizeof(Real)); + h_send_buffer_y1_particles = (Real *)malloc(buffer_length_particles_y1_send * sizeof(Real)); + h_send_buffer_z0_particles = (Real *)malloc(buffer_length_particles_z0_send * sizeof(Real)); + h_send_buffer_z1_particles = (Real *)malloc(buffer_length_particles_z1_send * sizeof(Real)); + h_recv_buffer_x0_particles = (Real *)malloc(buffer_length_particles_x0_recv * sizeof(Real)); + h_recv_buffer_x1_particles = (Real *)malloc(buffer_length_particles_x1_recv * sizeof(Real)); + h_recv_buffer_y0_particles = (Real *)malloc(buffer_length_particles_y0_recv * sizeof(Real)); + h_recv_buffer_y1_particles = (Real *)malloc(buffer_length_particles_y1_recv * sizeof(Real)); + h_recv_buffer_z0_particles = (Real *)malloc(buffer_length_particles_z0_recv * sizeof(Real)); + h_recv_buffer_z1_particles = (Real *)malloc(buffer_length_particles_z1_recv * sizeof(Real)); + #endif // (defined(PARTICLES_GPU) && !defined(MPI_GPU)) || + // defined(PARTICLES_CPU) + #endif // PARTICLES } - /* MPI reduction wrapper for max(Real)*/ Real ReduceRealMax(Real x) { @@ -675,11 +684,10 @@ Real ReduceRealMax(Real x) Real y; MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_MAX, world); - y = (Real) out; + y = (Real)out; return y; } - /* MPI reduction wrapper for min(Real)*/ Real ReduceRealMin(Real x) { @@ -688,11 +696,10 @@ Real ReduceRealMin(Real x) Real y; MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_MIN, world); - y = (Real) out; + y = (Real)out; return y; } - /* MPI reduction wrapper for avg(Real)*/ Real ReduceRealAvg(Real x) { @@ -701,11 +708,34 @@ Real ReduceRealAvg(Real x) Real y; MPI_Allreduce(&in, &out, 1, MPI_CHREAL, MPI_SUM, world); - y = (Real) out / nproc; + y = (Real)out / nproc; return y; } -#ifdef PARTICLES +size_t Reduce_size_t_Max(size_t in) +{ + // Get the right MPI type + #if SIZE_MAX == UCHAR_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR + #elif SIZE_MAX == USHRT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_SHORT + #elif SIZE_MAX == UINT_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED + #elif SIZE_MAX == ULONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG + #elif SIZE_MAX == ULLONG_MAX + #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG + #else + #error "Error: Type of size_t not supported by Reduce_size_t_Max" + #endif + + // Perform the reduction + size_t out; + MPI_Allreduce(&in, &out, 1, my_MPI_SIZE_T, MPI_MAX, world); + return out; +} + + #ifdef PARTICLES /* MPI reduction wrapper for sum(part_int)*/ Real ReducePartIntSum(part_int_t x) { @@ -713,29 +743,31 @@ Real ReducePartIntSum(part_int_t x) part_int_t out; part_int_t y; - #ifdef PARTICLES_LONG_INTS + #ifdef PARTICLES_LONG_INTS MPI_Allreduce(&in, &out, 1, MPI_LONG, MPI_SUM, world); - #else + #else MPI_Allreduce(&in, &out, 1, MPI_INT, MPI_SUM, world); - #endif - y = (part_int_t) out ; + #endif + y = (part_int_t)out; return y; } - // Count the particles in the MPI ranks lower than this rank (procID) to get a // global offset for the local IDs. -part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ){ +part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local) +{ part_int_t global_offset; part_int_t *n_local_all, *n_local_send; - n_local_send = (part_int_t *) malloc( 1*sizeof(part_int_t) ); - n_local_all = (part_int_t *) malloc( nproc*sizeof(part_int_t) ); + n_local_send = (part_int_t *)malloc(1 * sizeof(part_int_t)); + n_local_all = (part_int_t *)malloc(nproc * sizeof(part_int_t)); n_local_send[0] = n_local; - MPI_Allgather( n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world ); + MPI_Allgather(n_local_send, 1, MPI_PART_INT, n_local_all, 1, MPI_PART_INT, world); global_offset = 0; - for (int other_rank=0; other_rank 1) { + int gpf = greatest_prime_factor(number); + number /= gpf; + dims[index % 3] *= gpf; + index += 1; + } + np_x = dims[0]; + np_y = dims[1]; + np_z = dims[2]; +} + /*tile MPI processes in a block arrangement*/ void TileBlockDecomposition(void) { int n_gpf; - //initialize np_x, np_y, np_z + // initialize np_x, np_y, np_z int np_x = 1; int np_y = 1; int np_z = 1; - //printf("nproc %d n_gpf %d\n",nproc,n_gpf); + // printf("nproc %d n_gpf %d\n",nproc,n_gpf); /* 1-D case is trivial */ - if (nz_global==1 && ny_global==1) { + if (nz_global == 1 && ny_global == 1) { nproc_x = nproc; nproc_y = 1; nproc_z = 1; @@ -849,9 +889,9 @@ void TileBlockDecomposition(void) /* 2-D case we can just assign domain*/ - if (nz_global==1) { + if (nz_global == 1) { np_x = n_gpf; - np_y = nproc/np_x; + np_y = nproc / np_x; // ensure nproc_x > nproc_y if (np_x < np_y) { nproc_x = np_y; @@ -864,73 +904,32 @@ void TileBlockDecomposition(void) return; } - /*base decomposition on whether n_gpf==2*/ - if(n_gpf!=2) { - /*we are in 3-d, so split remainder evenly*/ - np_x = n_gpf; - n_gpf = greatest_prime_factor(nproc/n_gpf); - if(n_gpf!=2) { - /*the next greatest prime is odd, so just split*/ - np_y = n_gpf; - np_z = nproc/(np_x*np_y); - } else { - /*increase ny, nz round-robin*/ - while(np_x*np_y*np_z < nproc) - { - np_y*=2; - if(np_x*np_y*np_z==nproc) - break; - np_z*=2; - } + TileBlockDecomposition3D(nproc, np_x, np_y, np_z); + // reorder x, y, z - } - } else { - /*nproc is a power of 2*/ - /*we are in 3-d, so split remainder evenly*/ - - /*increase nx, ny, nz round-robin*/ - while(np_x*np_y*np_z < nproc) - { - np_x*=2; - if(np_x*np_y*np_z==nproc) - break; - np_y*=2; - if(np_x*np_y*np_z==nproc) - break; - np_z*=2; - } + int n_tmp; + if (np_z > np_y) { + n_tmp = np_y; + np_y = np_z; + np_z = n_tmp; + } + if (np_y > np_x) { + n_tmp = np_x; + np_x = np_y; + np_y = n_tmp; + } + if (np_z > np_y) { + n_tmp = np_y; + np_y = np_z; + np_z = n_tmp; } - - - //reorder x, y, z - int n_tmp; - if(np_z>np_y) - { - n_tmp = np_y; - np_y = np_z; - np_z = n_tmp; - } - if(np_y>np_x) - { - n_tmp = np_x; - np_x = np_y; - np_y = n_tmp; - } - if(np_z>np_y) - { - n_tmp = np_y; - np_y = np_z; - np_z = n_tmp; - } - - //save result + // save result nproc_x = np_x; nproc_y = np_y; nproc_z = np_z; } - /*! \fn int ***three_dimensional_int_array(int n, int l, int m) * * \brief Allocate a three dimensional (n x l x m) int array * */ @@ -939,60 +938,88 @@ int ***three_dimensional_int_array(int n, int l, int m) int ***x; x = new int **[n]; - for(int i=0;i MPI_Comm_node() { + // get the global process rank + int myid, nproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &nproc); - int xbsize = x_buffer_length, - ybsize = y_buffer_length, - zbsize = z_buffer_length; - - switch ( direction ) { - case ( 0 ): cudaMemcpy(d_recv_buffer_x0, h_recv_buffer_x0, - xbsize*sizeof(Real), cudaMemcpyHostToDevice); - break; - case ( 1 ): cudaMemcpy(d_recv_buffer_x1, h_recv_buffer_x1, - xbsize*sizeof(Real), cudaMemcpyHostToDevice); - break; - case ( 2 ): cudaMemcpy(d_recv_buffer_y0, h_recv_buffer_y0, - ybsize*sizeof(Real), cudaMemcpyHostToDevice); - break; - case ( 3 ): cudaMemcpy(d_recv_buffer_y1, h_recv_buffer_y1, - ybsize*sizeof(Real), cudaMemcpyHostToDevice); - break; - case ( 4 ): cudaMemcpy(d_recv_buffer_z0, h_recv_buffer_z0, - zbsize*sizeof(Real), cudaMemcpyHostToDevice); - break; - case ( 5 ): cudaMemcpy(d_recv_buffer_z1, h_recv_buffer_z1, - zbsize*sizeof(Real), cudaMemcpyHostToDevice); - break; + // if there is the only one process, then just return the global rank and size + if (nproc == 1) { + return {myid, nproc}; } + // get the hostname of the node + std::string pname; // node hostname + pname.resize(MPI_MAX_PROCESSOR_NAME); + int pname_length; // length of node hostname + + MPI_Get_processor_name(pname.data(), &pname_length); + + // hash the name of the node. MPI_Comm_split doesn't like negative numbers and accepts ints not unsigned ints so we + // need to take the absolute value + int const hash = std::abs(static_cast(std::hash{}(pname))); + + // split the communicator + MPI_Comm node_comm; // communicator for the procs on each node + MPI_Comm_split(MPI_COMM_WORLD, hash, myid, &node_comm); + + // get size and rank + MPI_Comm_rank(node_comm, &myid); + MPI_Comm_size(node_comm, &nproc); + + return {myid, nproc}; } #endif /*MPI_CHOLLA*/ diff --git a/src/mpi/mpi_routines.h b/src/mpi/mpi_routines.h index b94e8595c..913b5e36a 100644 --- a/src/mpi/mpi_routines.h +++ b/src/mpi/mpi_routines.h @@ -1,42 +1,45 @@ #ifdef MPI_CHOLLA -#ifndef MPI_ROUTINES_H -#define MPI_ROUTINES_H -#include -#include -#include "../grid/grid3D.h" -#include "../global/global.h" - -#ifdef FFTW -#include "fftw3.h" -#include "fftw3-mpi.h" -#endif /*FFTW*/ + #ifndef MPI_ROUTINES_H + #define MPI_ROUTINES_H + #include + #include + + #include + + #include "../global/global.h" + #include "../grid/grid3D.h" + + #ifdef FFTW + #include "fftw3-mpi.h" + #include "fftw3.h" + #endif /*FFTW*/ /*Global MPI Variables*/ -extern int procID; /*process rank*/ -extern int nproc; /*number of processes in global comm*/ -extern int root; /*rank of root process*/ +// NOTE: some variable heavily used by mpi are declared in global.h so that they are defined even +// when compiled without mpi + extern int procID_node; /*process rank on node*/ extern int nproc_node; /*number of MPI processes on node*/ -extern MPI_Comm world; /*global communicator*/ -extern MPI_Comm node; /*communicator for each node*/ +extern MPI_Comm world; /*global communicator*/ +extern MPI_Comm node; /*communicator for each node*/ extern MPI_Datatype MPI_CHREAL; /*data type describing float precision*/ -#ifdef PARTICLES + #ifdef PARTICLES extern MPI_Datatype MPI_PART_INT; /*data type describing interger for particles precision*/ -#endif + #endif -//extern MPI_Request send_request[6]; -//extern MPI_Request recv_request[6]; +// extern MPI_Request send_request[6]; +// extern MPI_Request recv_request[6]; extern MPI_Request *send_request; extern MPI_Request *recv_request; -//MPI destinations and sources +// MPI destinations and sources extern int dest[6]; extern int source[6]; -//Communication buffers +// Communication buffers // For BLOCK extern Real *d_send_buffer_x0; @@ -65,8 +68,8 @@ extern Real *h_recv_buffer_y1; extern Real *h_recv_buffer_z0; extern Real *h_recv_buffer_z1; -#ifdef PARTICLES -//Buffers for particles transfers + #ifdef PARTICLES +// Buffers for particles transfers extern Real *d_send_buffer_x0_particles; extern Real *d_send_buffer_x1_particles; extern Real *d_send_buffer_y0_particles; @@ -113,8 +116,7 @@ extern MPI_Request *recv_request_n_particles; // Request for Particles Transfer extern MPI_Request *send_request_particles_transfer; extern MPI_Request *recv_request_particles_transfer; -#endif//PARTICLES - + #endif // PARTICLES extern int send_buffer_length; extern int recv_buffer_length; @@ -134,9 +136,9 @@ extern ptrdiff_t nx_local_start; extern ptrdiff_t ny_local_start; extern ptrdiff_t nz_local_start; -#ifdef FFTW + #ifdef FFTW extern ptrdiff_t n_local_complex; -#endif /*FFTW*/ + #endif /*FFTW*/ /*number of MPI procs in each dimension*/ extern int nproc_x; @@ -148,9 +150,9 @@ extern int nproc_z; void InitializeChollaMPI(int *pargc, char **pargv[]); /* Perform domain decomposition */ -void DomainDecomposition(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); +void DomainDecomposition(struct Parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); -void DomainDecompositionBLOCK(struct parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); +void DomainDecompositionBLOCK(struct Parameters *P, struct Header *H, int nx_global, int ny_global, int nz_global); /*tile MPI processes in a block decomposition*/ void TileBlockDecomposition(void); @@ -164,17 +166,26 @@ Real ReduceRealMin(Real x); /* MPI reduction wrapper for avg(Real)*/ Real ReduceRealAvg(Real x); -#ifdef PARTICLES +/*! + * \brief MPI reduction wrapper to find the maximum of a size_t variable + * + * \param in The rank-local value to be reduced + * \return size_t The global reduced value + */ +size_t Reduce_size_t_Max(size_t in); + + #ifdef PARTICLES /* MPI reduction wrapper for sum(part_int)*/ Real ReducePartIntSum(part_int_t x); -// Count the particles in the MPI ranks lower that this rank to get a global offset for the local IDs. -part_int_t Get_Particles_IDs_Global_MPI_Offset( part_int_t n_local ); +// Count the particles in the MPI ranks lower that this rank to get a global +// offset for the local IDs. +part_int_t Get_Particles_IDs_Global_MPI_Offset(part_int_t n_local); -// Function that checks if the buffer size For the particles transfer is large enough, -// and grows the buffer if needed. -void Check_and_Grow_Particles_Buffer( Real **part_buffer, int *current_size_ptr, int new_size ); -#endif +// Function that checks if the buffer size For the particles transfer is large +// enough, and grows the buffer if needed. +void Check_and_Grow_Particles_Buffer(Real **part_buffer, int *current_size_ptr, int new_size); + #endif /* Print information about the domain properties */ void Print_Domain_Properties(struct Header H); @@ -185,19 +196,25 @@ void Allocate_MPI_DeviceBuffers(struct Header *H); /* find the greatest prime factor of an integer */ int greatest_prime_factor(int n); - /*! \fn int ***three_dimensional_int_array(int n, int l, int m) * * \brief Allocate a three dimensional (n x l x m) int array * */ int ***three_dimensional_int_array(int n, int l, int m); -/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int m) - * \brief De-allocate a three dimensional (n x l x m) int array. +/*! \fn void deallocate_three_int_dimensional_array(int ***x, int n, int l, int + * m) \brief De-allocate a three dimensional (n x l x m) int array. * */ void deallocate_three_dimensional_int_array(int ***x, int n, int l, int m); /* Copy MPI receive buffers on Host to their device locations */ -void copyHostToDeviceReceiveBuffer ( int direction ); +void copyHostToDeviceReceiveBuffer(int direction); + +/*! + * \brief Split the communicator for each node and return IDs + * + * \return std::pair The rank id and total number of processes + */ +std::pair MPI_Comm_node(); -#endif /*MPI_ROUTINES_H*/ -#endif /*MPI_CHOLLA*/ + #endif /*MPI_ROUTINES_H*/ +#endif /*MPI_CHOLLA*/ diff --git a/src/particles/density_CIC.cpp b/src/particles/density_CIC.cpp index c907e64eb..428a0e864 100644 --- a/src/particles/density_CIC.cpp +++ b/src/particles/density_CIC.cpp @@ -1,41 +1,40 @@ #ifdef PARTICLES -#include -#include -#include "math.h" -#include -#include "../global/global.h" -#include "../particles/particles_3D.h" -#include "../grid/grid3D.h" -#include "../io/io.h" - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include + #include + #include + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../particles/particles_3D.h" + #include "math.h" -//Get the particles Cloud-In-Cell interpolated density -void Particles_3D::Get_Density_CIC(){ + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +// Get the particles Cloud-In-Cell interpolated density +void Particles3D::Get_Density_CIC() +{ #ifdef PARTICLES_CPU - #ifdef PARALLEL_OMP + #ifdef PARALLEL_OMP Get_Density_CIC_OMP(); - #else + #else Get_Density_CIC_Serial(); - #endif //PARALLEL_OMP + #endif // PARALLEL_OMP #endif #ifdef PARTICLES_GPU Get_Density_CIC_GPU(); #endif - } - -//Compute the particles density and copy it to the array in Grav to compute the potential -void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){ - +// Compute the particles density and copy it to the array in Grav to compute the +// potential +void Grid3D::Copy_Particles_Density_to_Gravity(struct Parameters P) +{ #ifdef CPU_TIME Timer.Part_Density.Start(); #endif @@ -54,53 +53,53 @@ void Grid3D::Copy_Particles_Density_to_Gravity(struct parameters P){ // Step 2: Transfer Particles CIC density Boundaries Transfer_Particles_Density_Boundaries(P); - //Step 3: Copy Particles density to Gravity array + // Step 3: Copy Particles density to Gravity array Copy_Particles_Density(); #ifdef CPU_TIME Timer.Part_Dens_Transf.End(); #endif - - } -//Copy the particles density to the density array in Grav to compute the potential -void Grid3D::Copy_Particles_Density(){ - +// Copy the particles density to the density array in Grav to compute the +// potential +void Grid3D::Copy_Particles_Density() +{ #ifdef GRAVITY_GPU - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU Copy_Particles_Density_to_GPU(); - #endif + #endif Copy_Particles_Density_GPU(); #else - #ifndef PARALLEL_OMP - Copy_Particles_Density_function( 0, Grav.nz_local ); - #else + #ifndef PARALLEL_OMP + Copy_Particles_Density_function(0, Grav.nz_local); + #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(Grav.nz_local, n_omp_procs, omp_id, &g_start, &g_end); - Copy_Particles_Density_function( g_start, g_end ); + Copy_Particles_Density_function(g_start, g_end); } - #endif//PARALLEL_OMP + #endif // PARALLEL_OMP - #endif//GRAVITY_GPU + #endif // GRAVITY_GPU } -void Grid3D::Copy_Particles_Density_function( int g_start, int g_end ){ +void Grid3D::Copy_Particles_Density_function(int g_start, int g_end) +{ int nx_part, ny_part, nz_part, nGHST; - nGHST = Particles.G.n_ghost_particles_grid; - nx_part = Particles.G.nx_local + 2*nGHST; - ny_part = Particles.G.ny_local + 2*nGHST; - nz_part = Particles.G.nz_local + 2*nGHST; + nGHST = Particles.G.n_ghost_particles_grid; + nx_part = Particles.G.nx_local + 2 * nGHST; + ny_part = Particles.G.ny_local + 2 * nGHST; + nz_part = Particles.G.nz_local + 2 * nGHST; int nx_dens, ny_dens, nz_dens; nx_dens = Grav.nx_local; @@ -108,23 +107,22 @@ void Grid3D::Copy_Particles_Density_function( int g_start, int g_end ){ nz_dens = Grav.nz_local; int i, j, k, id_CIC, id_grid; - for ( k=g_start; k nx_g-3 ) ignore = true; - if ( indx_y > ny_g-3 ) ignore = true; - if ( indx_y > nz_g-3 ) ignore = true; - if ( x_pos < G.xMin || x_pos >= G.xMax ) in_local = false; - if ( y_pos < G.yMin || y_pos >= G.yMax ) in_local = false; - if ( z_pos < G.zMin || z_pos >= G.zMax ) in_local = false; - if ( ! in_local ) { + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z); + if (indx_x < -1) ignore = true; + if (indx_y < -1) ignore = true; + if (indx_z < -1) ignore = true; + if (indx_x > nx_g - 3) ignore = true; + if (indx_y > ny_g - 3) ignore = true; + if (indx_y > nz_g - 3) ignore = true; + if (x_pos < G.xMin || x_pos >= G.xMax) in_local = false; + if (y_pos < G.yMin || y_pos >= G.yMax) in_local = false; + if (z_pos < G.zMin || z_pos >= G.zMax) in_local = false; + if (!in_local) { std::cout << " Density CIC Error:" << std::endl; - #ifdef PARTICLE_IDS + #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] << std::endl; - #else + #else std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + #endif + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << x_pos << std::endl; std::cout << " Particle Y: " << y_pos << std::endl; std::cout << " Particle Z: " << z_pos << std::endl; - continue; + continue; } - if ( ignore ){ - #ifdef PARTICLE_IDS + if (ignore) { + #ifdef PARTICLE_IDS std::cout << "ERROR Density CIC Index pID: " << partIDs[pIndx] << std::endl; - #else + #else std::cout << "ERROR Density CIC Index " << std::endl; - #endif + #endif std::cout << "Negative xIndx: " << x_pos << " " << indx_x << std::endl; std::cout << "Negative zIndx: " << z_pos << " " << indx_z << std::endl; std::cout << "Negative yIndx: " << y_pos << " " << indx_y << std::endl; @@ -235,75 +231,71 @@ void Particles_3D::Get_Density_CIC_Serial( ){ // exit(-1); continue; } - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( x_pos - cell_center_x ) / dx; - delta_y = 1 - ( y_pos - cell_center_y ) / dy; - delta_z = 1 - ( z_pos - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (x_pos - cell_center_x) / dx; + delta_y = 1 - (y_pos - cell_center_y) / dy; + delta_z = 1 - (z_pos - cell_center_z) / dz; indx_x += nGHST; indx_y += nGHST; indx_z += nGHST; - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * delta_z; + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * delta_z; - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * delta_z; + indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * delta_z; - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * delta_z; + indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * delta_z; - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * (1-delta_z); + indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * (1 - delta_z); - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * delta_z; - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); + indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * (1 - delta_z); - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); + indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z); - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); } } - - -#ifdef PARALLEL_OMP -//Compute the CIC density when PARALLEL_OMP -void Particles_3D::Get_Density_CIC_OMP( ){ - - - //Span OpenMP threads - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifdef PARALLEL_OMP +// Compute the CIC density when PARALLEL_OMP +void Particles3D::Get_Density_CIC_OMP() +{ + // Span OpenMP threads + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id; int g_start, g_end; int n_omp_procs; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); int nGHST = G.n_ghost_particles_grid; - int nx_g = G.nx_local + 2*nGHST; - int ny_g = G.ny_local + 2*nGHST; - int nz_g = G.nz_local + 2*nGHST; + int nx_g = G.nx_local + 2 * nGHST; + int ny_g = G.ny_local + 2 * nGHST; + int nz_g = G.nz_local + 2 * nGHST; Real xMin, yMin, zMin, dx, dy, dz; - xMin = G.xMin; - yMin = G.yMin; - zMin = G.zMin; - dx = G.dx; - dy = G.dy; - dz = G.dz; - Real dV_inv = 1./(G.dx*G.dy*G.dz); - + xMin = G.xMin; + yMin = G.yMin; + zMin = G.zMin; + dx = G.dx; + dy = G.dy; + dz = G.dz; + Real dV_inv = 1. / (G.dx * G.dy * G.dz); - Get_OMP_Grid_Indxs( nz_g, n_omp_procs, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(nz_g, n_omp_procs, omp_id, &g_start, &g_end); part_int_t pIndx; int indx_x, indx_y, indx_z, indx; @@ -314,37 +306,37 @@ void Particles_3D::Get_Density_CIC_OMP( ){ bool ignore, in_local; bool add_1, add_2; - for ( pIndx=0; pIndx < n_local; pIndx++ ){ + for (pIndx = 0; pIndx < n_local; pIndx++) { add_1 = false; add_2 = false; - z_pos = pos_z[pIndx]; - indx_z = (int) floor( ( z_pos - zMin - 0.5*dz ) / dz ); + z_pos = pos_z[pIndx]; + indx_z = (int)floor((z_pos - zMin - 0.5 * dz) / dz); indx_z += nGHST; - if ( (indx_z >= g_start) && (indx_z < g_end) ) add_1 = true; - if ( ((indx_z+1) >= g_start) && ((indx_z+1) < g_end) ) add_2 = true; - if (!( add_1 || add_2) ) continue; + if ((indx_z >= g_start) && (indx_z < g_end)) add_1 = true; + if (((indx_z + 1) >= g_start) && ((indx_z + 1) < g_end)) add_2 = true; + if (!(add_1 || add_2)) continue; ignore = false; - x_pos = pos_x[pIndx]; - y_pos = pos_y[pIndx]; + x_pos = pos_x[pIndx]; + y_pos = pos_y[pIndx]; - indx_x = (int) floor( ( x_pos - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( y_pos - yMin - 0.5*dy ) / dy ); + indx_x = (int)floor((x_pos - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((y_pos - yMin - 0.5 * dy) / dy); indx_z -= nGHST; - if ( indx_x < -1 ) ignore = true; - if ( indx_y < -1 ) ignore = true; - if ( indx_z < -1 ) ignore = true; - if ( indx_x > nx_g-3 ) ignore = true; - if ( indx_y > ny_g-3 ) ignore = true; - if ( indx_y > nz_g-3 ) ignore = true; - if ( ignore ){ - #ifdef PARTICLE_IDS + if (indx_x < -1) ignore = true; + if (indx_y < -1) ignore = true; + if (indx_z < -1) ignore = true; + if (indx_x > nx_g - 3) ignore = true; + if (indx_y > ny_g - 3) ignore = true; + if (indx_y > nz_g - 3) ignore = true; + if (ignore) { + #ifdef PARTICLE_IDS std::cout << "ERROR CIC Index pID: " << partIDs[pIndx] << std::endl; - #else + #else std::cout << "ERROR CIC Index " << std::endl; - #endif + #endif std::cout << "Negative xIndx: " << x_pos << " " << indx_x << std::endl; std::cout << "Negative zIndx: " << z_pos << " " << indx_z << std::endl; std::cout << "Negative yIndx: " << y_pos << " " << indx_y << std::endl; @@ -356,23 +348,23 @@ void Particles_3D::Get_Density_CIC_OMP( ){ continue; } in_local = true; - if ( x_pos < G.xMin || x_pos >= G.xMax ) in_local = false; - if ( y_pos < G.yMin || y_pos >= G.yMax ) in_local = false; - if ( z_pos < G.zMin || z_pos >= G.zMax ) in_local = false; - if ( ! in_local ) { + if (x_pos < G.xMin || x_pos >= G.xMax) in_local = false; + if (y_pos < G.yMin || y_pos >= G.yMax) in_local = false; + if (z_pos < G.zMin || z_pos >= G.zMax) in_local = false; + if (!in_local) { std::cout << " Density CIC Error:" << std::endl; - #ifdef PARTICLE_IDS + #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << partIDs[pIndx] << std::endl; - #else + #else std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + #endif + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << x_pos << std::endl; std::cout << " Particle Y: " << y_pos << std::endl; std::cout << " Particle Z: " << z_pos << std::endl; - continue; + continue; } #ifdef SINGLE_PARTICLE_MASS @@ -381,49 +373,48 @@ void Particles_3D::Get_Density_CIC_OMP( ){ pMass = mass[pIndx] * dV_inv; #endif - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( x_pos - cell_center_x ) / dx; - delta_y = 1 - ( y_pos - cell_center_y ) / dy; - delta_z = 1 - ( z_pos - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (x_pos - cell_center_x) / dx; + delta_y = 1 - (y_pos - cell_center_y) / dy; + delta_z = 1 - (z_pos - cell_center_z) / dz; indx_x += nGHST; indx_y += nGHST; indx_z += nGHST; - if ( add_1 ){ - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * delta_z; + if (add_1) { + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * delta_z; - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * delta_z; + indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * delta_z; - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * delta_z; + indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * delta_z; - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * delta_z; } - if ( add_2 ){ - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * delta_y * (1-delta_z); + if (add_2) { + indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * delta_y * (1 - delta_z); - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); + indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * delta_y * (1 - delta_z); - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); + indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * delta_x * (1 - delta_y) * (1 - delta_z); - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - G.density[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; + G.density[indx] += pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); } } } } -#endif //PARALLEL_OMP - -#endif //PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #endif diff --git a/src/particles/density_CIC.h b/src/particles/density_CIC.h index 393c99a6a..b7181e68d 100644 --- a/src/particles/density_CIC.h +++ b/src/particles/density_CIC.h @@ -1,9 +1,10 @@ #ifdef PARTICLES -#ifndef DENSITY_CIC_H -#define DENSITY_CIC_H + #ifndef DENSITY_CIC_H + #define DENSITY_CIC_H -void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ); +void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, + int &indx_x, int &indx_y, int &indx_z); -#endif + #endif #endif \ No newline at end of file diff --git a/src/particles/density_CIC_gpu.cu b/src/particles/density_CIC_gpu.cu index 977f84421..756c48643 100644 --- a/src/particles/density_CIC_gpu.cu +++ b/src/particles/density_CIC_gpu.cu @@ -1,143 +1,161 @@ #ifdef PARTICLES -#include -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../particles/particles_3D.h" -#include "../grid/grid3D.h" - -#ifdef GRAVITY_GPU -void Grid3D::Copy_Particles_Density_to_GPU(){ - CudaSafeCall( cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells*sizeof(Real), cudaMemcpyHostToDevice) ); + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../particles/particles_3D.h" + #include "../utils/gpu.hpp" + + #ifdef GRAVITY_GPU +void Grid3D::Copy_Particles_Density_to_GPU() +{ + GPU_Error_Check(cudaMemcpy(Particles.G.density_dev, Particles.G.density, Particles.G.n_cells * sizeof(Real), + cudaMemcpyHostToDevice)); } -#endif + #endif -#ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU -//Define atomic_add if it's not supported -#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 -#else -__device__ double atomicAdd(double* address, double val) + // Define atomic_add if it's not supported + #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 + #else +__device__ double atomicAdd(double *address, double val) { - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); - return __longlong_as_double(old); + unsigned long long int *address_as_ull = (unsigned long long int *)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); } -#endif + #endif -//Get the CIC index from the particle position ( device function ) -__device__ void Get_Indexes_CIC( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){ - indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); +// Get the CIC index from the particle position ( device function ) +__device__ void Get_Indexes_CIC(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, + Real pos_z, int &indx_x, int &indx_y, int &indx_z) +{ + indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); + indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz); } -//CUDA Kernel to compute the CIC density from the particles positions -__global__ void Get_Density_CIC_Kernel( part_int_t n_local, Real particle_mass, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost ){ - - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +// CUDA Kernel to compute the CIC density from the particles positions +__global__ void Get_Density_CIC_Kernel(part_int_t n_local, Real particle_mass, Real *density_dev, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *mass_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, + int ny, int nz, int n_ghost) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) { + return; + } int nx_g, ny_g; - nx_g = nx + 2*n_ghost; - ny_g = ny + 2*n_ghost; + nx_g = nx + 2 * n_ghost; + ny_g = ny + 2 * n_ghost; Real pos_x, pos_y, pos_z, pMass; Real cell_center_x, cell_center_y, cell_center_z; Real delta_x, delta_y, delta_z; - Real dV_inv = 1./(dx*dy*dz); + Real dV_inv = 1. / (dx * dy * dz); pos_x = pos_x_dev[tid]; pos_y = pos_y_dev[tid]; pos_z = pos_z_dev[tid]; - #ifdef SINGLE_PARTICLE_MASS + #ifdef SINGLE_PARTICLE_MASS pMass = particle_mass * dV_inv; - #else + #else pMass = mass_dev[tid] * dV_inv; - #endif + #endif int indx_x, indx_y, indx_z, indx; - Get_Indexes_CIC( xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z ); + Get_Indexes_CIC(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z); bool in_local = true; - if ( pos_x < xMin || pos_x >= xMax ) in_local = false; - if ( pos_y < yMin || pos_y >= yMax ) in_local = false; - if ( pos_z < zMin || pos_z >= zMax ) in_local = false; - if ( ! in_local ) { - printf(" Density CIC Error: Particle outside local domain [%f %f %f] [%f %f] [%f %f] [%f %f]\n ", pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); + if (pos_x < xMin || pos_x >= xMax) { + in_local = false; + } + if (pos_y < yMin || pos_y >= yMax) { + in_local = false; + } + if (pos_z < zMin || pos_z >= zMax) { + in_local = false; + } + if (!in_local) { + printf( + " Density CIC Error: Particle outside local domain [%f %f %f] [%f " + "%f] [%f %f] [%f %f]\n ", + pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); return; } - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (pos_x - cell_center_x) / dx; + delta_y = 1 - (pos_y - cell_center_y) / dy; + delta_z = 1 - (pos_z - cell_center_z) / dz; indx_x += n_ghost; indx_y += n_ghost; indx_z += n_ghost; - - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * delta_y * delta_z; - atomicAdd( &density_dev[indx], pMass * delta_x * delta_y * delta_z); + atomicAdd(&density_dev[indx], pMass * delta_x * delta_y * delta_z); - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; + indx = (indx_x + 1) + indx_y * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * delta_y * delta_z; - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * delta_y * delta_z); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * delta_y * delta_z); - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + indx = indx_x + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * (1-delta_y) * delta_z; - atomicAdd( &density_dev[indx], pMass * delta_x * (1-delta_y) * delta_z); + atomicAdd(&density_dev[indx], pMass * delta_x * (1 - delta_y) * delta_z); // - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + indx = indx_x + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * delta_y * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * delta_x * delta_y * (1-delta_z) ); + atomicAdd(&density_dev[indx], pMass * delta_x * delta_y * (1 - delta_z)); - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + indx_z * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * delta_z; - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * (1-delta_y) * delta_z); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * (1 - delta_y) * delta_z); - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + indx_y * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * delta_y * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * delta_y * (1-delta_z)); + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * delta_y * (1 - delta_z)); - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = indx_x + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * delta_x * (1-delta_y) * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * delta_x * (1-delta_y) * (1-delta_z)); + atomicAdd(&density_dev[indx], pMass * delta_x * (1 - delta_y) * (1 - delta_z)); - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; // density_dev[indx] += pMass * (1-delta_x) * (1-delta_y) * (1-delta_z); - atomicAdd( &density_dev[indx], pMass * (1-delta_x) * (1-delta_y) * (1-delta_z)); - + atomicAdd(&density_dev[indx], pMass * (1 - delta_x) * (1 - delta_y) * (1 - delta_z)); } - - -//Clear the density array: density=0 -void Particles_3D::Clear_Density_GPU_function( Real *density_dev, int n_cells){ - Set_Particles_Array_Real( 0.0, density_dev, n_cells); +// Clear the density array: density=0 +void Particles3D::Clear_Density_GPU_function(Real *density_dev, int n_cells) +{ + Set_Particles_Array_Real(0.0, density_dev, n_cells); } - -//Call the CIC density kernel to get the particles density -void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev , Real *pos_z_dev, Real *mass_dev){ - +// Call the CIC density kernel to get the particles density +void Particles3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, + Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, + int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, + Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *mass_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -145,16 +163,18 @@ void Particles_3D::Get_Density_CIC_GPU_function(part_int_t n_local, Real particl // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, particle_mass, density_dev, pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid ); - CudaCheckError(); + hipLaunchKernelGGL(Get_Density_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, particle_mass, density_dev, + pos_x_dev, pos_y_dev, pos_z_dev, mass_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, + nx_local, ny_local, nz_local, n_ghost_particles_grid); + GPU_Error_Check(); cudaDeviceSynchronize(); } - #if !defined(GRAVITY_GPU) - //Copy the density from device to host - CudaSafeCall( cudaMemcpy(density_h, density_dev, n_cells*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif + #if !defined(GRAVITY_GPU) + // Copy the density from device to host + GPU_Error_Check(cudaMemcpy(density_h, density_dev, n_cells * sizeof(Real), cudaMemcpyDeviceToHost)); + #endif } -#endif//PARTICLES_GPU -#endif//PARTICLES + #endif // PARTICLES_GPU +#endif // PARTICLES diff --git a/src/particles/density_boundaries.cpp b/src/particles/density_boundaries.cpp index 3e5d56d7b..6884e99cd 100644 --- a/src/particles/density_boundaries.cpp +++ b/src/particles/density_boundaries.cpp @@ -1,128 +1,134 @@ #ifdef PARTICLES -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../particles/particles_3D.h" -#include + #include -//Copy the particles density boundaries for non-MPI PERIODIC transfers -void Grid3D::Set_Particles_Density_Boundaries_Periodic( int direction, int side ){ + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" +// Copy the particles density boundaries for non-MPI PERIODIC transfers +void Grid3D::Set_Particles_Density_Boundaries_Periodic(int direction, int side) +{ int i, j, k, indx_src, indx_dst; int nGHST, nx_g, ny_g, nz_g; nGHST = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*nGHST; - ny_g = Particles.G.ny_local + 2*nGHST; - nz_g = Particles.G.nz_local + 2*nGHST; - - //Copy X boundaries - if (direction == 0){ - for ( k=0; k + #include + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" - -__global__ void Set_Particles_Density_Boundaries_Periodic_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d ){ - +__global__ void Set_Particles_Density_Boundaries_Periodic_kernel(int direction, int side, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost, Real *density_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_src, tid_dst; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; - - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; - - if ( direction == 0 ){ - if ( side == 0 ) tid_src = ( nx - n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = ( n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = ( tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - } - if ( direction == 1 ){ - if ( side == 0 ) tid_src = (tid_i) + ( ny - n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + ( n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + ( tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx + (tid_j)*nx*ny; - } - if ( direction == 2 ){ - if ( side == 0 ) tid_src = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k )*nx*ny; - if ( side == 0 ) tid_dst = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny; - if ( side == 1 ) tid_src = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_dst = (tid_i) + (tid_j)*nx + ( nz - 2* n_ghost + tid_k )*nx*ny; - } - + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; + + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } + + if (direction == 0) { + if (side == 0) { + tid_src = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + } + if (direction == 1) { + if (side == 0) { + tid_src = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + } + if (direction == 2) { + if (side == 0) { + tid_src = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + } + if (side == 0) { + tid_dst = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + } + if (side == 1) { + tid_src = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_dst = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } + } + density_d[tid_dst] += density_d[tid_src]; - } - -void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int side ){ - +void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU(int direction, int side) +{ int n_ghost, nx_g, ny_g, nz_g, size, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*n_ghost; - ny_g = Particles.G.ny_local + 2*n_ghost; - nz_g = Particles.G.nz_local + 2*n_ghost; + nx_g = Particles.G.nx_local + 2 * n_ghost; + ny_g = Particles.G.ny_local + 2 * n_ghost; + nz_g = Particles.G.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -66,74 +90,79 @@ void Grid3D::Set_Particles_Density_Boundaries_Periodic_GPU( int direction, int s size = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size - 1 ) / TPB_PARTICLES + 1; + ngrid = (size - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Set_Particles_Density_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev ); - + hipLaunchKernelGGL(Set_Particles_Density_Boundaries_Periodic_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + n_i, n_j, nx_g, ny_g, nz_g, n_ghost, Particles.G.density_dev); } + #ifdef MPI_CHOLLA - - - -#ifdef MPI_CHOLLA - - - -__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d, Real *transfer_buffer_d ){ - +__global__ void Load_Particles_Density_Boundary_to_Buffer_kernel(int direction, int side, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost, Real *density_d, + Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_dens = ( tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = ( nx - n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) { + tid_dens = (tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (nx - n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } - if ( direction == 1 ){ - if ( side == 0 ) tid_dens = (tid_i) + ( tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + ( ny - n_ghost + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) { + tid_dens = (tid_i) + (tid_k)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (ny - n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } - if ( direction == 2 ){ - if ( side == 0 ) tid_dens = (tid_i) + (tid_j)*nx + ( tid_k )*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + (tid_j)*nx + ( nz - n_ghost + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) { + tid_dens = (tid_i) + (tid_j)*nx + (tid_k)*nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (tid_j)*nx + (nz - n_ghost + tid_k) * nx * ny; + } } transfer_buffer_d[tid_buffer] = density_d[tid_dens]; - } - - - - -int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int side, Real *buffer ){ - +int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU(int direction, int side, Real *buffer) +{ int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*n_ghost; - ny_g = Particles.G.ny_local + 2*n_ghost; - nz_g = Particles.G.nz_local + 2*n_ghost; + nx_g = Particles.G.nx_local + 2 * n_ghost; + ny_g = Particles.G.ny_local + 2 * n_ghost; + nz_g = Particles.G.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -141,7 +170,7 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int si size_buffer = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_PARTICLES + 1; + ngrid = (size_buffer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -153,65 +182,75 @@ int Grid3D::Load_Particles_Density_Boundary_to_Buffer_GPU( int direction, int si Real *send_buffer_d; send_buffer_d = buffer; - hipLaunchKernelGGL( Load_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d ); + hipLaunchKernelGGL(Load_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, send_buffer_d); cudaDeviceSynchronize(); return size_buffer; } - - - -__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel( int direction, int side, int n_i, int n_j, int nx, int ny, int nz, int n_ghost, Real *density_d, Real *transfer_buffer_d ){ - +__global__ void Unload_Particles_Density_Boundary_to_Buffer_kernel(int direction, int side, int n_i, int n_j, int nx, + int ny, int nz, int n_ghost, Real *density_d, + Real *transfer_buffer_d) +{ // get a global thread ID int tid, tid_i, tid_j, tid_k, tid_buffer, tid_dens; - tid = threadIdx.x + blockIdx.x * blockDim.x; - tid_k = tid / (n_i*n_j); - tid_j = (tid - tid_k*n_i*n_j) / n_i; - tid_i = tid - tid_k*n_i*n_j - tid_j*n_i; + tid = threadIdx.x + blockIdx.x * blockDim.x; + tid_k = tid / (n_i * n_j); + tid_j = (tid - tid_k * n_i * n_j) / n_i; + tid_i = tid - tid_k * n_i * n_j - tid_j * n_i; - if ( tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost ) return; + if (tid_i < 0 || tid_i >= n_i || tid_j < 0 || tid_j >= n_j || tid_k < 0 || tid_k >= n_ghost) { + return; + } - tid_buffer = tid_i + tid_j*n_i + tid_k*n_i*n_j; + tid_buffer = tid_i + tid_j * n_i + tid_k * n_i * n_j; - if ( direction == 0 ){ - if ( side == 0 ) tid_dens = ( n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = ( nx - 2*n_ghost + tid_k ) + (tid_i)*nx + (tid_j)*nx*ny; + if (direction == 0) { + if (side == 0) { + tid_dens = (n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (nx - 2 * n_ghost + tid_k) + (tid_i)*nx + (tid_j)*nx * ny; + } } - if ( direction == 1 ){ - if ( side == 0 ) tid_dens = (tid_i) + ( n_ghost + tid_k )*nx + (tid_j)*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + ( ny - 2*n_ghost + tid_k )*nx + (tid_j)*nx*ny; + if (direction == 1) { + if (side == 0) { + tid_dens = (tid_i) + (n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (ny - 2 * n_ghost + tid_k) * nx + (tid_j)*nx * ny; + } } - if ( direction == 2 ){ - if ( side == 0 ) tid_dens = (tid_i) + (tid_j)*nx + ( n_ghost + tid_k )*nx*ny; - if ( side == 1 ) tid_dens = (tid_i) + (tid_j)*nx + ( nz - 2* n_ghost + tid_k )*nx*ny; + if (direction == 2) { + if (side == 0) { + tid_dens = (tid_i) + (tid_j)*nx + (n_ghost + tid_k) * nx * ny; + } + if (side == 1) { + tid_dens = (tid_i) + (tid_j)*nx + (nz - 2 * n_ghost + tid_k) * nx * ny; + } } density_d[tid_dens] += transfer_buffer_d[tid_buffer]; - } - - - -void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, int side, Real *buffer ){ - +void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU(int direction, int side, Real *buffer) +{ int n_ghost, nx_g, ny_g, nz_g, size_buffer, ngrid, n_i, n_j; n_ghost = Particles.G.n_ghost_particles_grid; - nx_g = Particles.G.nx_local + 2*n_ghost; - ny_g = Particles.G.ny_local + 2*n_ghost; - nz_g = Particles.G.nz_local + 2*n_ghost; + nx_g = Particles.G.nx_local + 2 * n_ghost; + ny_g = Particles.G.ny_local + 2 * n_ghost; + nz_g = Particles.G.nz_local + 2 * n_ghost; - if ( direction == 0 ){ + if (direction == 0) { n_i = ny_g; n_j = nz_g; } - if ( direction == 1 ){ + if (direction == 1) { n_i = nx_g; n_j = nz_g; } - if ( direction == 2 ){ + if (direction == 2) { n_i = nx_g; n_j = ny_g; } @@ -219,7 +258,7 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i size_buffer = n_ghost * n_i * n_j; // set values for GPU kernels - ngrid = ( size_buffer - 1 ) / TPB_PARTICLES + 1; + ngrid = (size_buffer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -231,12 +270,10 @@ void Grid3D::Unload_Particles_Density_Boundary_From_Buffer_GPU( int direction, i Real *recv_buffer_d; recv_buffer_d = buffer; - hipLaunchKernelGGL( Unload_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d ); - + hipLaunchKernelGGL(Unload_Particles_Density_Boundary_to_Buffer_kernel, dim1dGrid, dim1dBlock, 0, 0, direction, side, + n_i, n_j, nx_g, ny_g, nz_g, n_ghost, density_d, recv_buffer_d); } + #endif // MPI_CHOLLA - -#endif//MPI_CHOLLA - -#endif//PARTICLES_GPU +#endif // PARTICLES_GPU & GRAVITY_GPU diff --git a/src/particles/feedback_CIC.cpp b/src/particles/feedback_CIC.cpp deleted file mode 100644 index 64c2940f7..000000000 --- a/src/particles/feedback_CIC.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#ifdef PARTICLES -#ifdef DE -#ifdef PARTICLE_AGE - -#include -#include "../particles/feedback_CIC.h" -#include "../particles/particles_3D.h" -#include "../grid/grid3D.h" -#include "../particles/density_CIC.h" - - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif - - -// simple energy feedback prescription -Real getClusterEnergyFeedback(Real t, Real dt, Real age) { - if (t + age <= 1.0e4) return ENERGY_FEEDBACK_RATE * dt; - else return 0; -} - -// simple feedback prescription -Real getClusterMassFeedback(Real t, Real dt, Real age) { - //if (t + age <= 1.0e4) return 0.1 * dt; // 0.01 SN/ky/cluster * 10 solar mass ejected/SN - //if (t + age <= 1.0e4) return 10 * dt; // 1 SN/ky/cluster * 10 solar mass ejected/SN - //else return 0; - return 0; -} - - -void Grid3D::Cluster_Feedback(){ - #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - Cluster_Feedback_Function( 0, Particles.n_local ); - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) - { - int omp_id, n_omp_procs; - part_int_t p_start, p_end; - - omp_id = omp_get_thread_num(); - n_omp_procs = omp_get_num_threads(); - - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - - Cluster_Feedback_Function( p_start, p_end ); - } - #endif //PARALLEL_OMP - #endif //PARTICLES_CPU -} - - -//Compute the CIC feedback -void Grid3D::Cluster_Feedback_Function(part_int_t p_start, part_int_t p_end) { - int nx_g, ny_g, nz_g; - nx_g = H.nx; - ny_g = H.ny; - nz_g = H.nz; - - Real xMin, yMin, zMin; - xMin = H.xblocal; //TODO: make sure this is correct (and not H.xbound) (local min vs. global min) - yMin = H.yblocal; - zMin = H.zblocal; - - - part_int_t pIndx; - int indx_x, indx_y, indx_z, indx; - Real x_pos, y_pos, z_pos; - Real cell_center_x, cell_center_y, cell_center_z; - Real delta_x, delta_y, delta_z; - Real dV_inv = 1./(H.dx*H.dy*H.dz); - Real feedback_energy, feedback_density; - - bool ignore, in_local; - for ( pIndx=p_start; pIndx < p_end; pIndx++ ){ - ignore = false; - in_local = true; - // pMass = Particles.mass[pIndx] * dV_inv; - x_pos = Particles.pos_x[pIndx]; - y_pos = Particles.pos_y[pIndx]; - z_pos = Particles.pos_z[pIndx]; - Get_Indexes_CIC( xMin, yMin, zMin, H.dx, H.dy, H.dz, x_pos, y_pos, z_pos, indx_x, indx_y, indx_z ); - if ( indx_x < -1 ) ignore = true; - if ( indx_y < -1 ) ignore = true; - if ( indx_z < -1 ) ignore = true; - if ( indx_x > nx_g-3 ) ignore = true; - if ( indx_y > ny_g-3 ) ignore = true; - if ( indx_y > nz_g-3 ) ignore = true; - if ( x_pos < H.xblocal || x_pos >= H.xblocal_max ) in_local = false; - if ( y_pos < H.yblocal || y_pos >= H.yblocal_max ) in_local = false; - if ( z_pos < H.zblocal || z_pos >= H.zblocal_max ) in_local = false; - if ( ! in_local ) { - std::cout << " Cluster_FeedbackError:" << std::endl; - #ifdef PARTICLE_IDS - std::cout << " Particle outside Local domain pID: " << Particles.partIDs[pIndx] << std::endl; - #else - std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << xMin << " " << H.xblocal_max << std::endl; - std::cout << " Domain Y: " << yMin << " " << H.xblocal_max << std::endl; - std::cout << " Domain Z: " << zMin << " " << H.xblocal_max << std::endl; - std::cout << " Particle X: " << x_pos << std::endl; - std::cout << " Particle Y: " << y_pos << std::endl; - std::cout << " Particle Z: " << z_pos << std::endl; - continue; - } - if ( ignore ){ - #ifdef PARTICLE_IDS - std::cout << "ERROR Cluster_Feedback Index pID: " << Particles.partIDs[pIndx] << std::endl; - #else - std::cout << "ERROR Cluster_Feedback Index " << std::endl; - #endif - std::cout << "Negative xIndx: " << x_pos << " " << indx_x << std::endl; - std::cout << "Negative zIndx: " << z_pos << " " << indx_z << std::endl; - std::cout << "Negative yIndx: " << y_pos << " " << indx_y << std::endl; - std::cout << "Excess xIndx: " << x_pos << " " << indx_x << std::endl; - std::cout << "Excess yIndx: " << y_pos << " " << indx_y << std::endl; - std::cout << "Excess zIndx: " << z_pos << " " << indx_z << std::endl; - std::cout << std::endl; - continue; - } - - cell_center_x = xMin + indx_x*H.dx + 0.5*H.dx; - cell_center_y = yMin + indx_y*H.dy + 0.5*H.dy; - cell_center_z = zMin + indx_z*H.dz + 0.5*H.dz; - delta_x = 1 - ( x_pos - cell_center_x ) / H.dx; - delta_y = 1 - ( y_pos - cell_center_y ) / H.dy; - delta_z = 1 - ( z_pos - cell_center_z ) / H.dz; - indx_x += H.n_ghost; - indx_y += H.n_ghost; - indx_z += H.n_ghost; - - feedback_energy = getClusterEnergyFeedback(H.t, H.dt, Particles.age[pIndx]) * dV_inv; - feedback_density = getClusterMassFeedback(H.t, H.dt, Particles.age[pIndx]) * dV_inv; - - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * delta_y * delta_z; - C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * delta_z; - - indx = (indx_x+1) + indx_y*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * delta_y * delta_z; - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * delta_z; - - indx = indx_x + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * (1-delta_y) * delta_z; - C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * delta_z; - - indx = indx_x + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * delta_y * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * delta_x * delta_y * (1-delta_z); - - indx = (indx_x+1) + (indx_y+1)*nx_g + indx_z*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * delta_z; - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * delta_z; - - indx = (indx_x+1) + indx_y*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * delta_y * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * delta_y * (1-delta_z); - - indx = indx_x + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * delta_x * (1-delta_y) * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * delta_x * (1-delta_y) * (1-delta_z); - - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; - C.density[indx] += feedback_density * (1-delta_x) * (1-delta_y) * (1-delta_z); - C.GasEnergy[indx] += feedback_energy * (1-delta_x) * (1-delta_y) * (1-delta_z); - } -} - -#endif //PARTICLE_AGE -#endif //DE -#endif //PARTICLES diff --git a/src/particles/feedback_CIC.h b/src/particles/feedback_CIC.h deleted file mode 100644 index 1775cb898..000000000 --- a/src/particles/feedback_CIC.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifdef PARTICLES -#ifdef DE -#ifdef PARTICLE_AGE - - -#ifndef FEEDBACK_CIC_H -#define FEEDBACK_CIC_H -#include "../global/global.h" - -#define ENERGY_FEEDBACK_RATE 5.25958e-07 //Rate is 1e51 erg/100M_solar spread out over 10Myr - -Real getClusterEnergyFeedback(Real t, Real dt, Real age); -Real getClusterMassFeedback(Real t, Real dt, Real age); - -#endif -#endif -#endif -#endif diff --git a/src/particles/feedback_CIC_gpu.cu b/src/particles/feedback_CIC_gpu.cu new file mode 100644 index 000000000..bd162e585 --- /dev/null +++ b/src/particles/feedback_CIC_gpu.cu @@ -0,0 +1,785 @@ +#if defined(SUPERNOVA) && defined(PARTICLES_GPU) && defined(PARTICLE_AGE) && defined(PARTICLE_IDS) + + #include + #include + #include + #include + + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "supernova.h" + + #define TPB_FEEDBACK 128 + #define FEED_INFO_N 6 + #define I_RES 1 // unused + #define I_UNRES 2 // unused + #define I_ENERGY 3 // unused + #define I_MOMENTUM 4 // unused + #define I_UNRES_ENERGY 5 // used + +typedef curandStateMRG32k3a_t FeedbackPrng; +// typedef curandStatePhilox4_32_10_t FeedbackPrng; + +namespace supernova +{ +FeedbackPrng* randStates; +part_int_t n_states; +Real *dev_snr, snr_dt, time_sn_start, time_sn_end; +int snr_n; +} // namespace supernova + + #ifndef O_HIP +// NOLINTNEXTLINE(readability-identifier-naming) +__device__ double atomicMax(double* address, double val) +{ + auto* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(fmax(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); +} + #endif // O_HIP + +__global__ void Init_State_Kernel(unsigned int seed, FeedbackPrng* states) +{ + int id = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(seed, id, 0, &states[id]); +} + +/** + * @brief Does 2 things: + * -# Read in SN rate data from Starburst 99. If no file exists, assume a + * constant rate. + * -# Initialize the cuRAND state, which is analogous to the concept of + * generators in CPU code. The state object maintains configuration and status + * the cuRAND context for each thread on the GPU. Initialize more than the + * number of local particles since the latter will change through MPI transfers. + * + * @param P pointer to parameters struct. Passes in starburst 99 filename and + * random number gen seed. + * @param n_local number of local particles on the GPU + * @param allocation_factor + */ +void supernova::initState(struct Parameters* P, part_int_t n_local, Real allocation_factor) +{ + chprintf("supernova::initState start\n"); + std::string snr_filename(P->snr_filename); + if (not snr_filename.empty()) { + chprintf("Specified a SNR filename %s.\n", snr_filename.data()); + + // read in array of supernova rate values. + std::ifstream snr_in(snr_filename); + if (!snr_in.is_open()) { + chprintf("ERROR: but couldn't read SNR file.\n"); + exit(-1); + } + + std::vector snr_time; + std::vector snr; + + const int N_HEADER = 7; // S'99 has 7 rows of header information + const char* s99_delim = " "; // S'99 data separator + std::string line; + int line_counter = 0; + + while (snr_in.good()) { + std::getline(snr_in, line); + if (line_counter++ < N_HEADER) { + continue; + } // skip header processing + + int i = 0; + char* data = strtok(line.data(), s99_delim); + while (data != nullptr) { + if (i == 0) { + // in the following divide by # years per kyr (1000) + snr_time.push_back(std::stof(std::string(data)) / 1000); + } else if (i == 1) { + snr.push_back(pow(10, std::stof(std::string(data))) / 1000); + } + if (i > 0) { + break; // only care about the first 2 items. Once i = 1 can break + } // here. + + data = strtok(nullptr, s99_delim); + i++; + } + } + + time_sn_end = snr_time[snr_time.size() - 1]; + time_sn_start = snr_time[0]; + // the following is the time interval between data points + // (i.e. assumes regular temporal spacing) + snr_dt = (time_sn_end - time_sn_start) / (snr.size() - 1); + + GPU_Error_Check(cudaMalloc((void**)&dev_snr, snr.size() * sizeof(Real))); + GPU_Error_Check(cudaMemcpy(dev_snr, snr.data(), snr.size() * sizeof(Real), cudaMemcpyHostToDevice)); + + } else { + chprintf("No SN rate file specified. Using constant rate\n"); + time_sn_start = DEFAULT_SN_START; + time_sn_end = DEFAULT_SN_END; + } + + // Now initialize the poisson random number generator state. + n_states = n_local * allocation_factor; + GPU_Error_Check(cudaMalloc((void**)&randStates, n_states * sizeof(FeedbackPrng))); + + int ngrid = (n_states - 1) / TPB_FEEDBACK + 1; + dim3 grid(ngrid); + dim3 block(TPB_FEEDBACK); + + hipLaunchKernelGGL(Init_State_Kernel, grid, block, 0, 0, P->prng_seed, randStates); + GPU_Error_Check(cudaDeviceSynchronize()); + chprintf("supernova::initState end: n_states=%ld, ngrid=%d, threads=%d\n", n_states, ngrid, TPB_FEEDBACK); +} + +__device__ Real GetSNRate(Real t, Real* dev_snr, Real snr_dt, Real t_start, Real t_end) +{ + if (t < t_start || t >= t_end) { + return 0; + } + if (dev_snr == nullptr) { + return supernova::DEFAULT_SNR; + } + + int index = (int)((t - t_start) / snr_dt); + return dev_snr[index] + (t - index * snr_dt) * (dev_snr[index + 1] - dev_snr[index]) / snr_dt; +} + +__device__ Real Calc_Timestep(Real gamma, Real* density, Real* momentum_x, Real* momentum_y, Real* momentum_z, + Real* energy, int index, Real dx, Real dy, Real dz, Real density_floor) +{ + Real dens = fmax(density[index], density_floor); + Real d_inv = 1.0 / dens; + Real vx = momentum_x[index] * d_inv; + Real vy = momentum_y[index] * d_inv; + Real vz = momentum_z[index] * d_inv; + Real P = fmax((energy[index] - 0.5 * dens * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0), TINY_NUMBER); + Real cs = sqrt(gamma * P * d_inv); + return fmax(fmax((fabs(vx) + cs) / dx, (fabs(vy) + cs) / dy), (fabs(vz) + cs) / dz); +} + +/** the prescription for dividing a scalar quantity between 3x3x3 cells is done + by imagining a 2x2x2 cell volume around the SN. These fractions, then, + represent the linear extent of this volume into the cell in question. For i=0 + this should be 1*1/2. For i=-1 this should be (1-dx)*1/2. For i=+1 this + should be dx*1/2. In the above the 1/2 factor is normalize over 2 + cells/direction. + */ +__device__ Real Frac(int i, Real dx) { return (-0.5 * i * i - 0.5 * i + 1 + i * dx) * 0.5; } + +__device__ Real D_Fr(int i, Real dx) +{ + return (dx > 0.5) * i * (1 - 2 * dx) + ((i + 1) * dx + 0.5 * (i - 1)) - 3 * (i - 1) * (i + 1) * (0.5 - dx); +} + +__device__ Real GetAverageDensity(Real* density, int xi, int yi, int zi, int nx_grid, int ny_grid, int n_ghost) +{ + Real d_average = 0.0; + for (int i = -1; i < 2; i++) { + for (int j = -1; j < 2; j++) { + for (int k = -1; k < 2; k++) { + d_average += + density[(xi + n_ghost + i) + (yi + n_ghost + j) * nx_grid + (zi + n_ghost + k) * nx_grid * ny_grid]; + } + } + } + return d_average / 27; +} + +__device__ Real GetAverageNumberDensity_CGS(Real* density, int xi, int yi, int zi, int nx_grid, int ny_grid, + int n_ghost) +{ + return GetAverageDensity(density, xi, yi, zi, nx_grid, ny_grid, n_ghost) * DENSITY_UNIT / (supernova::MU * MP); +} + +__device__ bool Particle_Is_Alone(Real* pos_x_dev, Real* pos_y_dev, Real* pos_z_dev, part_int_t n_local, int gtid, + Real dx) +{ + Real x0 = pos_x_dev[gtid]; + Real y0 = pos_y_dev[gtid]; + Real z0 = pos_z_dev[gtid]; + // Brute force loop to see if particle is alone + for (int i = 0; i < n_local; i++) { + if (i == gtid) { + continue; + } + if (abs(x0 - pos_x_dev[i]) > dx) { + continue; + } + if (abs(y0 - pos_y_dev[i]) > dx) { + continue; + } + if (abs(z0 - pos_z_dev[i]) > dx) { + continue; + } + // If we made it here, something is too close. + return false; + } + return true; +} + +__global__ void Cluster_Feedback_Kernel(part_int_t n_local, part_int_t* id, Real* pos_x_dev, Real* pos_y_dev, + Real* pos_z_dev, Real* mass_dev, Real* age_dev, Real xMin, Real yMin, Real zMin, + Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx_g, int ny_g, + int nz_g, int n_ghost, Real t, Real dt, Real* dti, Real* info, Real* density, + Real* gasEnergy, Real* energy, Real* momentum_x, Real* momentum_y, + Real* momentum_z, Real gamma, FeedbackPrng* states, Real* prev_dens, + int* prev_N, short direction, Real* dev_snr, Real snr_dt, Real time_sn_start, + Real time_sn_end, int n_step, Real density_floor) +{ + __shared__ Real s_info[FEED_INFO_N * TPB_FEEDBACK]; // for collecting SN feedback information, like # + // of SNe or # resolved. + int tid = threadIdx.x; + int gtid = blockIdx.x * blockDim.x + tid; + + s_info[FEED_INFO_N * tid] = 0; // number of supernovae + s_info[FEED_INFO_N * tid + 1] = 0; // number of resolved events + s_info[FEED_INFO_N * tid + 2] = 0; // number of unresolved events + s_info[FEED_INFO_N * tid + 3] = 0; // resolved energy + s_info[FEED_INFO_N * tid + 4] = 0; // unresolved momentum + s_info[FEED_INFO_N * tid + 5] = 0; // unresolved KE added via momentum injection + + if (gtid < n_local) { + Real pos_x, pos_y, pos_z; + Real cell_center_x, cell_center_y, cell_center_z; + Real delta_x, delta_y, delta_z; + Real x_frac, y_frac, z_frac; + Real px, py, pz, d; + // Real t_b, t_a, v_1, v_2, d_b, d_a, p_b, p_a, e; + Real feedback_energy = 0, feedback_density = 0, feedback_momentum = 0, n_0, shell_radius; + bool is_resolved = false; + Real dV = dx * dy * dz; + Real local_dti = 0.0; + + pos_x = pos_x_dev[gtid]; + pos_y = pos_y_dev[gtid]; + pos_z = pos_z_dev[gtid]; + // kernel_printf("(%d): pos:(%.4e, %.4e, %.4e)\n", gtid, pos_x, pos_y, + // pos_z); kernel_printf("(%d): MIN:(%.4e, %.4e, %.4e)\n", gtid, xMin, yMin, + // xMin); + + bool in_local = + (pos_x >= xMin && pos_x < xMax) && (pos_y >= yMin && pos_y < yMax) && (pos_z >= zMin && pos_z < zMax); + if (!in_local) { + kernel_printf( + " Feedback GPU: Particle outside local domain [%f %f %f] [%f %f] " + "[%f %f] [%f %f]\n ", + pos_x, pos_y, pos_z, xMin, xMax, yMin, yMax, zMin, zMax); + } + + int indx_x = (int)floor((pos_x - xMin) / dx); + int indx_y = (int)floor((pos_y - yMin) / dy); + int indx_z = (int)floor((pos_z - zMin) / dz); + // kernel_printf("(%d): indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, indx_z); + + bool ignore = indx_x < 0 || indx_y < 0 || indx_z < 0 || indx_x >= nx_g - 2 * n_ghost || + indx_y >= ny_g - 2 * n_ghost || indx_z >= nz_g - 2 * n_ghost; + if (ignore) { + kernel_printf( + " Feedback GPU: Particle CIC index err [%f %f %f] [%d %d %d] [%d " + "%d %d] \n ", + pos_x, pos_y, pos_z, indx_x, indx_y, indx_z, nx_g, ny_g, nz_g); + } + + // Avoid overlap issues for now + // bool is_alone = Particle_Is_Alone(pos_x_dev, pos_y_dev, pos_z_dev, + // n_local, + // gtid, 6 * dx); + + if (!ignore && in_local) { + int N = 0; + // only calculate this if there will be SN feedback + if ((t - age_dev[gtid]) <= time_sn_end) { + if (direction == -1) { + N = -prev_N[gtid]; + } else { + Real average_num_sn = + GetSNRate(t - age_dev[gtid], dev_snr, snr_dt, time_sn_start, time_sn_end) * mass_dev[gtid] * dt; + + // N = (int) (average_num_sn + 0.5); + + FeedbackPrng state; // = states[0]; // load initial state + + curand_init(42, 0, 0, &state); + unsigned long long skip = n_step * 10000 + id[gtid]; + skipahead(skip, &state); // provided by curand + // unsigned int debug_state = curand(&state); + + N = (int)curand_poisson(&state, average_num_sn); + + // states[gtid] = state; // don't write back to state, keep it + // pristine + prev_N[gtid] = N; + } + if (N != 0) { + mass_dev[gtid] -= N * supernova::MASS_PER_SN; + feedback_energy = N * supernova::ENERGY_PER_SN / dV; + feedback_density = N * supernova::MASS_PER_SN / dV; + if (direction == -1) { + n_0 = prev_dens[gtid]; + } else { + n_0 = GetAverageNumberDensity_CGS(density, indx_x, indx_y, indx_z, nx_g, ny_g, n_ghost); + prev_dens[gtid] = n_0; + } + // int devcount; + // cudaGetDeviceCount(&devcount); + // int devId; + // cudaGetDevice(&devId); + // kernel_printf("[%d: %d] N: %d, time: %.4e, dt: %.4e, e: %.4e, n_0: + // %.4e\n", devId, gtid, N, t, dt, feedback_energy, n_0); + + feedback_momentum = direction * supernova::FINAL_MOMENTUM * pow(n_0, -0.17) * pow(fabsf(N), 0.93) / dV; + shell_radius = supernova::R_SH * pow(n_0, -0.46) * pow(fabsf(N), 0.29); + is_resolved = 3 * max(dx, max(dy, dz)) <= shell_radius; + if (!is_resolved) { + kernel_printf( + "UR[%f] at (%d, %d, %d) id=%d, N=%d, shell_rad=%0.4e, " + "n_0=%0.4e\n", + t, indx_x + n_ghost, indx_y + n_ghost, indx_z + n_ghost, (int)id[gtid], N, shell_radius, n_0); + } + + s_info[FEED_INFO_N * tid] = 1. * N; + if (is_resolved) { + s_info[FEED_INFO_N * tid + 1] = direction * 1.0; + } else { + s_info[FEED_INFO_N * tid + 2] = direction * 1.0; + } + + int indx; + + if (is_resolved) { // if resolved inject energy and density + s_info[FEED_INFO_N * tid + 3] = feedback_energy * dV; + + indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); + indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz); + + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + + delta_x = 1 - (pos_x - cell_center_x) / dx; + delta_y = 1 - (pos_y - cell_center_y) / dy; + delta_z = 1 - (pos_z - cell_center_z) / dz; + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + for (int k = 0; k < 2; k++) { + indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g; + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Rb: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Rb: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Rb: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + + // i_frac are the fractions of energy/density to be allocated + // to each of the 8 cells. + x_frac = i * (1 - delta_x) + (1 - i) * delta_x; + y_frac = j * (1 - delta_y) + (1 - j) * delta_y; + z_frac = k * (1 - delta_z) + (1 - k) * delta_z; + + atomicAdd(&density[indx], x_frac * y_frac * z_frac * feedback_density); + atomicAdd(&gasEnergy[indx], x_frac * y_frac * z_frac * feedback_energy); + atomicAdd(&energy[indx], x_frac * y_frac * z_frac * feedback_energy); + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ra: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ra: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ra: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + + if (direction > 0) { + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, + energy, indx, dx, dy, dz, density_floor)); + } + } + } + } + } else { // if not resolved, inject momentum and density + s_info[FEED_INFO_N * tid + 4] = feedback_momentum * dV; + + delta_x = (pos_x - xMin - indx_x * dx) / dx; + delta_y = (pos_y - yMin - indx_y * dy) / dy; + delta_z = (pos_z - zMin - indx_z * dz) / dz; + // kernel_printf("(%d):indx:(%d, %d, %d)\n", gtid, indx_x, indx_y, + // indx_z); kernel_printf("(%d): pos:(%.4e, %.4e, %.4e), delta_x + // (%.2e, %.2e, + // %.2e)\n", gtid, pos_x, pos_y, pos_z, delta_x, delta_y, delta_z); + + indx_x += n_ghost; + indx_y += n_ghost; + indx_z += n_ghost; + + if (abs(feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5) > + 40000) { // injected speeds are greater than 4e4 km/s + kernel_printf("**** (%d, %d, %d) injected speeds are %.3e km/s\n", indx_x, indx_y, indx_z, + feedback_momentum / feedback_density * VELOCITY_UNIT * 1e-5); + } + feedback_momentum /= sqrt(3.0); + + for (int i = -1; i < 2; i++) { + for (int j = -1; j < 2; j++) { + for (int k = -1; k < 2; k++) { + // index in array of conserved quantities + indx = (indx_x + i) + (indx_y + j) * nx_g + (indx_z + k) * nx_g * ny_g; + + x_frac = D_Fr(i, delta_x) * Frac(j, delta_y) * Frac(k, delta_z); + y_frac = Frac(i, delta_x) * D_Fr(j, delta_y) * Frac(k, delta_z); + z_frac = Frac(i, delta_x) * Frac(j, delta_y) * D_Fr(k, delta_z); + + px = x_frac * feedback_momentum; + py = y_frac * feedback_momentum; + pz = z_frac * feedback_momentum; + d = (abs(x_frac) + abs(y_frac) + abs(z_frac)) / 6 * feedback_density + + n_0 * supernova::MU * MP / DENSITY_UNIT; + + // d = frac(i, delta_x) * frac(j, delta_y) * frac(k, delta_z) + // * feedback_density; e = frac(i, delta_x) * frac(j, + // delta_y) * frac(k, delta_z) * feedback_energy; + // kernel_printf("(%d, %d, %d): delta:(%.4e, %.4e, %.4e), + // frac: %.4e\n", indx_x, indx_y, indx_z, delta_x, delta_y, + // delta_z, frac(i, delta_x)*frac(j, delta_y)*frac(k, + // delta_z)); kernel_printf("(%d, %d, %d):(%d SN) (i:%d, j:%d, + // k:%d) before: %.4e\n", indx_x, indx_y, indx_z, N, i, j, k, + // density[indx]*DENSITY_UNIT/0.6/MP); + + // v_1 = sqrt((momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx])*VELOCITY_UNIT/1e5; + // t_b = gasEnergy[indx]*ENERGY_UNIT*(gamma - + // 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); p_b = + // sqrt(momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; d_b = + // density[indx]*DENSITY_UNIT/0.6/MP; + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ub: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ub: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ub: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + + atomicAdd(&momentum_x[indx], px); + atomicAdd(&momentum_y[indx], py); + atomicAdd(&momentum_z[indx], pz); + + density[indx] = d; + energy[indx] = (momentum_x[indx] * momentum_x[indx] + momentum_y[indx] * momentum_y[indx] + + momentum_z[indx] * momentum_z[indx]) / + 2 / density[indx] + + gasEnergy[indx]; + + // atomicAdd( &energy[indx], e ); + // atomicAdd( &density[indx], d ); + + s_info[FEED_INFO_N * tid + I_UNRES_ENERGY] += + direction * (px * px + py * py + pz * pz) / 2 / density[indx] * dV; + + if (abs(momentum_x[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ua: (%d, %d, %d) vx = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_x[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_y[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ua: (%d, %d, %d) vy = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_y[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + if (abs(momentum_z[indx] / density[indx]) >= C_L) { + kernel_printf( + "%d, Ua: (%d, %d, %d) vz = %.3e, d = %.3e, n_0 = " + "%.3e\n", + direction, indx_x + i, indx_y + j, indx_z + k, + momentum_z[indx] / density[indx] * VELOCITY_UNIT * 1e-5, + density[indx] * DENSITY_UNIT / 0.6 / MP, n_0); + } + // gasEnergy[indx] = energy[indx] - + // (momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])/2/density[indx]; v_2 = + // sqrt((momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])/density[indx]/density[indx]) + // * VELOCITY_UNIT/1e5; t_a = + // gasEnergy[indx]*ENERGY_UNIT*(gamma - + // 1)/(density[indx]*DENSITY_UNIT/0.6/MP*KB); d_a = + // density[indx]*DENSITY_UNIT/0.6/MP; p_a = + // sqrt(momentum_x[indx]*momentum_x[indx] + + // momentum_y[indx]*momentum_y[indx] + + // momentum_z[indx]*momentum_z[indx])*VELOCITY_UNIT/1e5; + + // kernel_printf("(%d, %d, %d):(CM: %.2e, SN: %d) (i:%d, j:%d, + // k:%d) v_1: %.5e v_2: %.5e V_DIFF-> %.4f %%\n", indx_x, + // indx_y, indx_z, mass_dev[gtid], N, i, j, k, v_1, v_2, + // (v_2-v_1)/v_1*100); kernel_printf(" (%d, %d, %d):(%d SN) + // (i:%d, j:%d, k:%d) T_b: %.5e T_a: %.5e T_DIFF-> %.4f + // %%\n", indx_x, indx_y, indx_z, N, i, j, k, t_b, t_a, + // (t_a-t_b)/t_b*100); kernel_printf(" (%d, %d, %d):(%d + // SN) (i:%d, j:%d, k:%d) d_b: %.5e d_a: %.5e D_DIFF-> %.1f + // %%\n", indx_x, indx_y, indx_z, N, i, j, k, d_b, d_a, + // (d_a-d_b)/d_b*100); kernel_printf(" (%d, %d, + // %d):(%d SN) (i:%d, j:%d, k:%d) p_b: %.5e p_a: %.5e P_DIFF-> + // %.4f + // %%\n", indx_x, indx_y, indx_z, N, i, j, k, p_b, p_a, + // (p_a-p_b)/p_b*100); + + if (direction > 0) { + // kernel_printf("urs time:%.3e id:%d N:%d d:%.5e\n", t, + // id[gtid], N, n_0); + local_dti = fmax(local_dti, Calc_Timestep(gamma, density, momentum_x, momentum_y, momentum_z, + energy, indx, dx, dy, dz, density_floor)); + } + } + } + } + } + if (direction > 0) { + atomicMax(dti, local_dti); + } + } + } + } + } + + __syncthreads(); + + // reduce the info from all the threads in the block + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + s_info[FEED_INFO_N * tid] += s_info[FEED_INFO_N * (tid + s)]; + s_info[FEED_INFO_N * tid + 1] += s_info[FEED_INFO_N * (tid + s) + 1]; + s_info[FEED_INFO_N * tid + 2] += s_info[FEED_INFO_N * (tid + s) + 2]; + s_info[FEED_INFO_N * tid + 3] += s_info[FEED_INFO_N * (tid + s) + 3]; + s_info[FEED_INFO_N * tid + 4] += s_info[FEED_INFO_N * (tid + s) + 4]; + s_info[FEED_INFO_N * tid + 5] += s_info[FEED_INFO_N * (tid + s) + 5]; + } + __syncthreads(); + } + + if (tid == 0) { + info[FEED_INFO_N * blockIdx.x] = s_info[0]; + info[FEED_INFO_N * blockIdx.x + 1] = s_info[1]; + info[FEED_INFO_N * blockIdx.x + 2] = s_info[2]; + info[FEED_INFO_N * blockIdx.x + 3] = s_info[3]; + info[FEED_INFO_N * blockIdx.x + 4] = s_info[4]; + info[FEED_INFO_N * blockIdx.x + 5] = s_info[5]; + } +} + +Real supernova::Cluster_Feedback(Grid3D& G, FeedbackAnalysis& analysis) +{ + #ifdef CPU_TIME + G.Timer.Feedback.Start(); + #endif + + if (G.H.dt == 0) { + return 0.0; + } + + /* + if (G.Particles.n_local > supernova::n_states) { + printf("ERROR: not enough cuRAND states (%ld) for %ld local particles\n", + supernova::n_states, G.Particles.n_local); + exit(-1); + } + */ + + Real h_dti = 0.0; + int direction, ngrid; + Real h_info[6] = {0, 0, 0, 0, 0, 0}; + Real info[6]; + Real *d_dti, *d_info; + // require d_prev_dens & d_prev_N in case we have to undo feedback if the time + // step is too large. + Real* d_prev_dens; + int* d_prev_N; + + if (G.Particles.n_local > 0) { + GPU_Error_Check(cudaMalloc(&d_dti, sizeof(Real))); + GPU_Error_Check(cudaMemcpy(d_dti, &h_dti, sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMalloc(&d_prev_dens, G.Particles.n_local * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&d_prev_N, G.Particles.n_local * sizeof(int))); + GPU_Error_Check(cudaMemset(d_prev_dens, 0, G.Particles.n_local * sizeof(Real))); + GPU_Error_Check(cudaMemset(d_prev_N, 0, G.Particles.n_local * sizeof(int))); + + ngrid = std::ceil((1. * G.Particles.n_local) / TPB_FEEDBACK); + GPU_Error_Check(cudaMalloc((void**)&d_info, FEED_INFO_N * ngrid * sizeof(Real))); + } + // TODO: info collection and max dti calculation + // assumes ngrid is 1. The reason being that reduction of + // d_info is currently done on each block. Only the first block reduction + // is used + + do { + direction = 1; + if (G.Particles.n_local > 0) { + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, + G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, + G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, + G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, + G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy, + G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, + supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, + time_sn_end, G.H.n_step, G.H.density_floor); + + GPU_Error_Check(cudaMemcpy(&h_dti, d_dti, sizeof(Real), cudaMemcpyDeviceToHost)); + } + + #ifdef MPI_CHOLLA + h_dti = ReduceRealMax(h_dti); + MPI_Barrier(world); + #endif // MPI_CHOLLA + + if (h_dti != 0 && (C_cfl / h_dti < G.H.dt)) { + // timestep too big: need to undo the last operation + direction = -1; + if (G.Particles.n_local > 0) { + hipLaunchKernelGGL(Cluster_Feedback_Kernel, ngrid, TPB_FEEDBACK, 0, 0, G.Particles.n_local, + G.Particles.partIDs_dev, G.Particles.pos_x_dev, G.Particles.pos_y_dev, G.Particles.pos_z_dev, + G.Particles.mass_dev, G.Particles.age_dev, G.H.xblocal, G.H.yblocal, G.H.zblocal, + G.H.xblocal_max, G.H.yblocal_max, G.H.zblocal_max, G.H.dx, G.H.dy, G.H.dz, G.H.nx, G.H.ny, + G.H.nz, G.H.n_ghost, G.H.t, G.H.dt, d_dti, d_info, G.C.d_density, G.C.d_GasEnergy, + G.C.d_Energy, G.C.d_momentum_x, G.C.d_momentum_y, G.C.d_momentum_z, gama, + supernova::randStates, d_prev_dens, d_prev_N, direction, dev_snr, snr_dt, time_sn_start, + time_sn_end, G.H.n_step, G.H.density_floor); + + GPU_Error_Check(cudaDeviceSynchronize()); + } + G.H.dt = C_cfl / h_dti; + } + + } while (direction == -1); + + if (G.Particles.n_local > 0) { + GPU_Error_Check(cudaMemcpy(&h_info, d_info, FEED_INFO_N * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(cudaFree(d_dti)); + GPU_Error_Check(cudaFree(d_info)); + GPU_Error_Check(cudaFree(d_prev_dens)); + GPU_Error_Check(cudaFree(d_prev_N)); + } + + #ifdef MPI_CHOLLA + MPI_Reduce(&h_info, &info, FEED_INFO_N, MPI_CHREAL, MPI_SUM, root, world); + #else + info = h_info; + #endif + + analysis.countSN += (int)info[supernova::SN]; + analysis.countResolved += (int)info[supernova::RESOLVED]; + analysis.countUnresolved += (int)info[supernova::NOT_RESOLVED]; + analysis.totalEnergy += info[supernova::ENERGY]; + analysis.totalMomentum += info[supernova::MOMENTUM]; + analysis.totalUnresEnergy += info[supernova::UNRES_ENERGY]; + + Real resolved_ratio = 0.0; + if (info[supernova::RESOLVED] > 0 || info[supernova::NOT_RESOLVED] > 0) { + resolved_ratio = info[supernova::RESOLVED] / (info[supernova::RESOLVED] + info[supernova::NOT_RESOLVED]); + } + Real global_resolved_ratio = 0.0; + if (analysis.countResolved > 0 || analysis.countUnresolved > 0) { + global_resolved_ratio = (Real)(analysis.countResolved) / (Real)(analysis.countResolved + analysis.countUnresolved); + } + + chprintf("iteration %d: number of SN: %d, ratio of resolved %.3e\n", G.H.n_step, (long)info[supernova::SN], + resolved_ratio); + chprintf( + " this iteration: energy: %.5e erg. momentum: %.5e S.M. km/s " + "unres_energy: %.5e erg\n", + info[supernova::ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT, + info[supernova::MOMENTUM] * VELOCITY_UNIT / 1e5, + info[supernova::UNRES_ENERGY] * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT); + chprintf(" cummulative: #SN: %d, ratio of resolved (R: %d, UR: %d) = %.3e\n", (long)analysis.countSN, + (long)analysis.countResolved, (long)analysis.countUnresolved, global_resolved_ratio); + chprintf( + " energy: %.5e erg. Total momentum: %.5e S.M. km/s, Total unres " + "energy: %.5e\n", + analysis.totalEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT, + analysis.totalMomentum * VELOCITY_UNIT / 1e5, + analysis.totalUnresEnergy * MASS_UNIT * LENGTH_UNIT * LENGTH_UNIT / TIME_UNIT / TIME_UNIT); + + #ifdef CPU_TIME + G.Timer.Feedback.End(); + #endif + + return h_dti; +} + +#endif // SUPERNOVA & PARTICLES_GPU & PARTICLE_IDS & PARTICLE_AGE diff --git a/src/particles/gravity_CIC.cpp b/src/particles/gravity_CIC.cpp index be28d06a1..495e7cf33 100644 --- a/src/particles/gravity_CIC.cpp +++ b/src/particles/gravity_CIC.cpp @@ -1,121 +1,121 @@ #ifdef PARTICLES -#include -#include -#include "math.h" -#include -#include "../global/global.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "../particles/particles_3D.h" -#include "../particles/density_CIC.h" -#include "../model/disk_galaxy.h" + #include + #include + #include -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../model/disk_galaxy.h" + #include "density_CIC.h" + #include "math.h" + #include "particles_3D.h" -//Get the Gravitational Field from the potential: g=-gradient(potential) -void Grid3D::Get_Gravity_Field_Particles(){ + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif +// Get the Gravitational Field from the potential: g=-gradient(potential) +void Grid3D::Get_Gravity_Field_Particles() +{ #ifdef PARTICLES_CPU - - #ifdef GRAVITY_GPU + + #ifdef GRAVITY_GPU Copy_Potential_From_GPU(); - #endif + #endif - #ifndef PARALLEL_OMP - Get_Gravity_Field_Particles_function( 0, Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid); - #else + #ifndef PARALLEL_OMP + Get_Gravity_Field_Particles_function(0, Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid); + #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; int g_start, g_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Grid_Indxs( Particles.G.nz_local + 2*Particles.G.n_ghost_particles_grid, N_OMP_THREADS, omp_id, &g_start, &g_end ); + Get_OMP_Grid_Indxs(Particles.G.nz_local + 2 * Particles.G.n_ghost_particles_grid, N_OMP_THREADS, omp_id, &g_start, + &g_end); - Get_Gravity_Field_Particles_function( g_start, g_end); + Get_Gravity_Field_Particles_function(g_start, g_end); } - #endif//PARALLEL_OMP - #endif//PARTICLES_CPU - + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU - Particles.Get_Gravity_Field_Particles_GPU( Grav.F.potential_h ); + Particles.Get_Gravity_Field_Particles_GPU(Grav.F.potential_h); #endif - } - -void Grid3D::Get_Gravity_CIC(){ - +void Grid3D::Get_Gravity_CIC() +{ #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - Get_Gravity_CIC_function( 0, Particles.n_local ); - #else + #ifndef PARALLEL_OMP + Get_Gravity_CIC_function(0, Particles.n_local); + #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); - Get_Gravity_CIC_function( p_start, p_end ); + Get_Gravity_CIC_function(p_start, p_end); } - #endif//PARALLEL_OMP - #endif//PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU Particles.Get_Gravity_CIC_GPU(); #endif } - -#ifdef PARTICLES_GPU -void Particles_3D::Get_Gravity_Field_Particles_GPU( Real *potential_host ){ - - Get_Gravity_Field_Particles_GPU_function( G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev ); - + #ifdef PARTICLES_GPU +void Particles3D::Get_Gravity_Field_Particles_GPU(Real *potential_host) +{ + Get_Gravity_Field_Particles_GPU_function(G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, + G.n_cells_potential, G.dx, G.dy, G.dz, potential_host, G.potential_dev, + G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } -void Particles_3D::Get_Gravity_CIC_GPU(){ - - Get_Gravity_CIC_GPU_function( n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev ); +void Particles3D::Get_Gravity_CIC_GPU() +{ + Get_Gravity_CIC_GPU_function(n_local, G.nx_local, G.ny_local, G.nz_local, G.n_ghost_particles_grid, G.xMin, G.xMax, + G.yMin, G.yMax, G.zMin, G.zMax, G.dx, G.dy, G.dz, pos_x_dev, pos_y_dev, pos_z_dev, + grav_x_dev, grav_y_dev, grav_z_dev, G.gravity_x_dev, G.gravity_y_dev, G.gravity_z_dev); } -#endif //PARTICLES_GPU - - -#ifdef PARTICLES_CPU + #endif // PARTICLES_GPU -//Compute the gradient of the potential -void Grid3D::Get_Gravity_Field_Particles_function( int g_start, int g_end ){ + #ifdef PARTICLES_CPU +// Compute the gradient of the potential +void Grid3D::Get_Gravity_Field_Particles_function(int g_start, int g_end) +{ int nx_grav, ny_grav, nz_grav, nGHST_grav; nGHST_grav = Particles.G.n_ghost_particles_grid; - nx_grav = Particles.G.nx_local + 2*nGHST_grav; - ny_grav = Particles.G.ny_local + 2*nGHST_grav; - nz_grav = Particles.G.nz_local + 2*nGHST_grav; + nx_grav = Particles.G.nx_local + 2 * nGHST_grav; + ny_grav = Particles.G.ny_local + 2 * nGHST_grav; + nz_grav = Particles.G.nz_local + 2 * nGHST_grav; int nx_grid, ny_grid, nz_grid, nGHST_grid; Real *potential; - potential = Grav.F.potential_h; + potential = Grav.F.potential_h; nGHST_grid = N_GHOST_POTENTIAL; - nx_grid = Grav.nx_local + 2*nGHST_grid; - ny_grid = Grav.ny_local + 2*nGHST_grid; - nz_grid = Grav.nz_local + 2*nGHST_grid; + nx_grid = Grav.nx_local + 2 * nGHST_grid; + ny_grid = Grav.ny_local + 2 * nGHST_grid; + nz_grid = Grav.nz_local + 2 * nGHST_grid; int nGHST = nGHST_grid - nGHST_grav; @@ -124,96 +124,97 @@ void Grid3D::Get_Gravity_Field_Particles_function( int g_start, int g_end ){ dy = Particles.G.dy; dz = Particles.G.dz; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT Real phi_ll, phi_rr; int id_ll, id_rr; - #endif + #endif Real phi_l, phi_r; int k, j, i, id_l, id_r, id; - for ( k=g_start; k -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../particles/particles_3D.h" - -#ifdef GRAVITY_GPU -#include "../grid/grid3D.h" -#endif - -#ifdef PARTICLES_GPU - -//Copy the potential from host to device -void Particles_3D::Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential ){ - CudaSafeCall( cudaMemcpy( potential_dev, potential_host, n_cells_potential*sizeof(Real), cudaMemcpyHostToDevice) ); -} + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" + + #ifdef GRAVITY_GPU + #include "../grid/grid3D.h" + #endif + #ifdef PARTICLES_GPU -//Kernel to compute the gradient of the potential -__global__ void Get_Gravity_Field_Particles_Kernel( Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, int nx, int ny, int nz, int n_ghost_particles_grid, int n_ghost_potential, Real dx, Real dy, Real dz ){ +// Copy the potential from host to device +void Particles3D::Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential) +{ + GPU_Error_Check(cudaMemcpy(potential_dev, potential_host, n_cells_potential * sizeof(Real), cudaMemcpyHostToDevice)); +} +// Kernel to compute the gradient of the potential +__global__ void Get_Gravity_Field_Particles_Kernel(Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev, int nx, int ny, int nz, + int n_ghost_particles_grid, int n_ghost_potential, Real dx, Real dy, + Real dz) +{ int tid_x = blockIdx.x * blockDim.x + threadIdx.x; int tid_y = blockIdx.y * blockDim.y + threadIdx.y; int tid_z = blockIdx.z * blockDim.z + threadIdx.z; int nx_grav, ny_grav, nz_grav; - nx_grav = nx + 2*n_ghost_particles_grid; - ny_grav = ny + 2*n_ghost_particles_grid; - nz_grav = nz + 2*n_ghost_particles_grid; + nx_grav = nx + 2 * n_ghost_particles_grid; + ny_grav = ny + 2 * n_ghost_particles_grid; + nz_grav = nz + 2 * n_ghost_particles_grid; - if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav ) return; - int tid = tid_x + tid_y*nx_grav + tid_z*nx_grav*ny_grav; + if (tid_x >= nx_grav || tid_y >= ny_grav || tid_z >= nz_grav) { + return; + } + int tid = tid_x + tid_y * nx_grav + tid_z * nx_grav * ny_grav; int nx_pot, ny_pot; - nx_pot = nx + 2*n_ghost_potential; - ny_pot = ny + 2*n_ghost_potential; - - // if (tid == 0) printf( "potential: %f\n", potential_dev[tid] ); + nx_pot = nx + 2 * n_ghost_potential; + ny_pot = ny + 2 * n_ghost_potential; int nGHST = n_ghost_potential - n_ghost_particles_grid; Real phi_l, phi_r; int id_l, id_r; - #ifdef GRAVITY_5_POINTS_GRADIENT + #ifdef GRAVITY_5_POINTS_GRADIENT Real phi_ll, phi_rr; int id_ll, id_rr; - #endif + #endif // Get Potential Gradient X - id_l = (tid_x-1 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_r = (tid_x+1 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; + id_l = (tid_x - 1 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + 1 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x-2 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_rr = (tid_x+2 + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - phi_ll = potential_dev[id_ll]; - phi_rr = potential_dev[id_rr]; - gravity_x_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dx); - #else - gravity_x_dev[tid] = -0.5 * ( phi_r - phi_l ) / dx; - #endif + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (tid_x - 2 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + 2 + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + phi_ll = potential_dev[id_ll]; + phi_rr = potential_dev[id_rr]; + gravity_x_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dx); + #else + gravity_x_dev[tid] = -0.5 * (phi_r - phi_l) / dx; + #endif // Get Potential Gradient Y - id_l = (tid_x + nGHST) + (tid_y-1 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_r = (tid_x + nGHST) + (tid_y+1 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; + id_l = (tid_x + nGHST) + (tid_y - 1 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + nGHST) + (tid_y + 1 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x + nGHST) + (tid_y-2 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - id_rr = (tid_x + nGHST) + (tid_y+2 + nGHST)*nx_pot + (tid_z + nGHST)*ny_pot*nx_pot; - phi_ll = potential_dev[id_ll]; - phi_rr = potential_dev[id_rr]; - gravity_y_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dy); - #else - gravity_y_dev[tid] = -0.5 * ( phi_r - phi_l ) / dy; - #endif + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (tid_x + nGHST) + (tid_y - 2 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + nGHST) + (tid_y + 2 + nGHST) * nx_pot + (tid_z + nGHST) * ny_pot * nx_pot; + phi_ll = potential_dev[id_ll]; + phi_rr = potential_dev[id_rr]; + gravity_y_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dy); + #else + gravity_y_dev[tid] = -0.5 * (phi_r - phi_l) / dy; + #endif // Get Potential Gradient Z - id_l = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z-1 + nGHST)*ny_pot*nx_pot; - id_r = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z+1 + nGHST)*ny_pot*nx_pot; + id_l = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z - 1 + nGHST) * ny_pot * nx_pot; + id_r = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + 1 + nGHST) * ny_pot * nx_pot; phi_l = potential_dev[id_l]; phi_r = potential_dev[id_r]; - #ifdef GRAVITY_5_POINTS_GRADIENT - id_ll = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z-2 + nGHST)*ny_pot*nx_pot; - id_rr = (tid_x + nGHST) + (tid_y + nGHST)*nx_pot + (tid_z+2 + nGHST)*ny_pot*nx_pot; - phi_ll = potential_dev[id_ll]; - phi_rr = potential_dev[id_rr]; - gravity_z_dev[tid] = -1 * ( -phi_rr + 8*phi_r - 8*phi_l + phi_ll) / (12*dz); - #else - gravity_z_dev[tid] = -0.5 * ( phi_r - phi_l ) / dz; - #endif - + #ifdef GRAVITY_5_POINTS_GRADIENT + id_ll = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z - 2 + nGHST) * ny_pot * nx_pot; + id_rr = (tid_x + nGHST) + (tid_y + nGHST) * nx_pot + (tid_z + 2 + nGHST) * ny_pot * nx_pot; + phi_ll = potential_dev[id_ll]; + phi_rr = potential_dev[id_rr]; + gravity_z_dev[tid] = -1 * (-phi_rr + 8 * phi_r - 8 * phi_l + phi_ll) / (12 * dz); + #else + gravity_z_dev[tid] = -0.5 * (phi_r - phi_l) / dz; + #endif } - -//Call the kernel to compute the gradient of the potential -void Particles_3D::Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ){ - - #ifndef GRAVITY_GPU - Copy_Potential_To_GPU( potential_host, potential_dev, n_cells_potential ); - #endif +// Call the kernel to compute the gradient of the potential +void Particles3D::Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, int n_cells_potential, Real dx, + Real dy, Real dz, Real *potential_host, Real *potential_dev, + Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) +{ + #ifndef GRAVITY_GPU + Copy_Potential_To_GPU(potential_host, potential_dev, n_cells_potential); + #endif int nx_g, ny_g, nz_g; - nx_g = nx_local + 2*N_GHOST_POTENTIAL; - ny_g = ny_local + 2*N_GHOST_POTENTIAL; - nz_g = nz_local + 2*N_GHOST_POTENTIAL; + nx_g = nx_local + 2 * N_GHOST_POTENTIAL; + ny_g = ny_local + 2 * N_GHOST_POTENTIAL; + nz_g = nz_local + 2 * N_GHOST_POTENTIAL; // set values for GPU kernels - int tpb_x = 8; - int tpb_y = 8; - int tpb_z = 8; - int ngrid_x = (nx_g + tpb_x - 1) / tpb_x; - int ngrid_y = (ny_g + tpb_y - 1) / tpb_y; - int ngrid_z = (nz_g + tpb_z - 1) / tpb_z; + int tpb_x = 8; + int tpb_y = 8; + int tpb_z = 8; + int ngrid_x = (nx_g + tpb_x - 1) / tpb_x; + int ngrid_y = (ny_g + tpb_y - 1) / tpb_y; + int ngrid_z = (nz_g + tpb_z - 1) / tpb_z; // number of blocks per 1D grid dim3 dim3dGrid(ngrid_x, ngrid_y, ngrid_z); // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - - hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, N_GHOST_POTENTIAL, dx, dy, dz ); - CudaCheckError(); + hipLaunchKernelGGL(Get_Gravity_Field_Particles_Kernel, dim3dGrid, dim3dBlock, 0, 0, potential_dev, gravity_x_dev, + gravity_y_dev, gravity_z_dev, nx_local, ny_local, nz_local, n_ghost_particles_grid, + N_GHOST_POTENTIAL, dx, dy, dz); + GPU_Error_Check(); } - -//Get CIC indexes from the particles positions -__device__ void Get_Indexes_CIC_Gravity( Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z ){ - indx_x = (int) floor( ( pos_x - xMin - 0.5*dx ) / dx ); - indx_y = (int) floor( ( pos_y - yMin - 0.5*dy ) / dy ); - indx_z = (int) floor( ( pos_z - zMin - 0.5*dz ) / dz ); +// Get CIC indexes from the particles positions +__device__ void Get_Indexes_CIC_Gravity(Real xMin, Real yMin, Real zMin, Real dx, Real dy, Real dz, Real pos_x, + Real pos_y, Real pos_z, int &indx_x, int &indx_y, int &indx_z) +{ + indx_x = (int)floor((pos_x - xMin - 0.5 * dx) / dx); + indx_y = (int)floor((pos_y - yMin - 0.5 * dy) / dy); + indx_z = (int)floor((pos_z - zMin - 0.5 * dz) / dz); } -//Kernel to compute the gravitational field at the particles positions via Cloud-In-Cell -__global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, int ny, int nz, int n_ghost ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - - if ( tid >= n_local) return; +// Kernel to compute the gravitational field at the particles positions via +// Cloud-In-Cell +__global__ void Get_Gravity_CIC_Kernel(part_int_t n_local, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real xMin, Real yMin, + Real zMin, Real xMax, Real yMax, Real zMax, Real dx, Real dy, Real dz, int nx, + int ny, int nz, int n_ghost) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid >= n_local) { + return; + } int nx_g, ny_g; - nx_g = nx + 2*n_ghost; - ny_g = ny + 2*n_ghost; + nx_g = nx + 2 * n_ghost; + ny_g = ny + 2 * n_ghost; Real pos_x, pos_y, pos_z; Real cell_center_x, cell_center_y, cell_center_z; @@ -159,97 +174,106 @@ __global__ void Get_Gravity_CIC_Kernel( part_int_t n_local, Real *gravity_x_dev, pos_y = pos_y_dev[tid]; pos_z = pos_z_dev[tid]; - int indx_x, indx_y, indx_z, indx; - Get_Indexes_CIC_Gravity( xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z ); + Get_Indexes_CIC_Gravity(xMin, yMin, zMin, dx, dy, dz, pos_x, pos_y, pos_z, indx_x, indx_y, indx_z); bool in_local = true; - if ( pos_x < xMin || pos_x >= xMax ) in_local = false; - if ( pos_y < yMin || pos_y >= yMax ) in_local = false; - if ( pos_z < zMin || pos_z >= zMax ) in_local = false; - if ( ! in_local ) { + if (pos_x < xMin || pos_x >= xMax) { + in_local = false; + } + if (pos_y < yMin || pos_y >= yMax) { + in_local = false; + } + if (pos_z < zMin || pos_z >= zMax) { + in_local = false; + } + if (!in_local) { printf(" Gravity CIC Error: Particle outside local domain"); return; } - cell_center_x = xMin + indx_x*dx + 0.5*dx; - cell_center_y = yMin + indx_y*dy + 0.5*dy; - cell_center_z = zMin + indx_z*dz + 0.5*dz; - delta_x = 1 - ( pos_x - cell_center_x ) / dx; - delta_y = 1 - ( pos_y - cell_center_y ) / dy; - delta_z = 1 - ( pos_z - cell_center_z ) / dz; + cell_center_x = xMin + indx_x * dx + 0.5 * dx; + cell_center_y = yMin + indx_y * dy + 0.5 * dy; + cell_center_z = zMin + indx_z * dz + 0.5 * dz; + delta_x = 1 - (pos_x - cell_center_x) / dx; + delta_y = 1 - (pos_y - cell_center_y) / dy; + delta_z = 1 - (pos_z - cell_center_z) / dz; indx_x += n_ghost; indx_y += n_ghost; indx_z += n_ghost; - indx = indx_x + indx_y*nx_g + indx_z*nx_g*ny_g; + indx = indx_x + indx_y * nx_g + indx_z * nx_g * ny_g; g_x_bl = gravity_x_dev[indx]; g_y_bl = gravity_y_dev[indx]; g_z_bl = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y)*nx_g + (indx_z)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y)*nx_g + (indx_z)*nx_g * ny_g; g_x_br = gravity_x_dev[indx]; g_y_br = gravity_y_dev[indx]; g_z_br = gravity_z_dev[indx]; - indx = (indx_x) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g; + indx = (indx_x) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g; g_x_bu = gravity_x_dev[indx]; g_y_bu = gravity_y_dev[indx]; g_z_bu = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z)*nx_g * ny_g; g_x_bru = gravity_x_dev[indx]; g_y_bru = gravity_y_dev[indx]; g_z_bru = gravity_z_dev[indx]; - indx = (indx_x) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tl = gravity_x_dev[indx]; g_y_tl = gravity_y_dev[indx]; g_z_tl = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y)*nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tr = gravity_x_dev[indx]; g_y_tr = gravity_y_dev[indx]; g_z_tr = gravity_z_dev[indx]; - indx = (indx_x) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tu = gravity_x_dev[indx]; g_y_tu = gravity_y_dev[indx]; g_z_tu = gravity_z_dev[indx]; - indx = (indx_x+1) + (indx_y+1)*nx_g + (indx_z+1)*nx_g*ny_g; + indx = (indx_x + 1) + (indx_y + 1) * nx_g + (indx_z + 1) * nx_g * ny_g; g_x_tru = gravity_x_dev[indx]; g_y_tru = gravity_y_dev[indx]; g_z_tru = gravity_z_dev[indx]; - g_x = g_x_bl*(delta_x)*(delta_y)*(delta_z) + g_x_br*(1-delta_x)*(delta_y)*(delta_z) + - g_x_bu*(delta_x)*(1-delta_y)*(delta_z ) + g_x_bru*(1-delta_x)*(1-delta_y)*(delta_z) + - g_x_tl*(delta_x)*(delta_y)*(1-delta_z) + g_x_tr*(1-delta_x)*(delta_y)*(1-delta_z) + - g_x_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_x_tru*(1-delta_x)*(1-delta_y)*(1-delta_z); + g_x = g_x_bl * (delta_x) * (delta_y) * (delta_z) + g_x_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_x_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_x_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_x_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_x_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_x_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_x_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); - g_y = g_y_bl*(delta_x)*(delta_y)*(delta_z) + g_y_br*(1-delta_x)*(delta_y)*(delta_z) + - g_y_bu*(delta_x)*(1-delta_y)*(delta_z) + g_y_bru*(1-delta_x)*(1-delta_y)*(delta_z) + - g_y_tl*(delta_x)*(delta_y)*(1-delta_z) + g_y_tr*(1-delta_x)*(delta_y)*(1-delta_z) + - g_y_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_y_tru*(1-delta_x)*(1-delta_y)*(1-delta_z); + g_y = g_y_bl * (delta_x) * (delta_y) * (delta_z) + g_y_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_y_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_y_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_y_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_y_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_y_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_y_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); - g_z = g_z_bl*(delta_x)*(delta_y)*(delta_z) + g_z_br*(1-delta_x)*(delta_y)*(delta_z) + - g_z_bu*(delta_x)*(1-delta_y)*(delta_z) + g_z_bru*(1-delta_x)*(1-delta_y)*(delta_z) + - g_z_tl*(delta_x)*(delta_y)*(1-delta_z) + g_z_tr*(1-delta_x)*(delta_y)*(1-delta_z) + - g_z_tu*(delta_x)*(1-delta_y)*(1-delta_z) + g_z_tru*(1-delta_x)*(1-delta_y)*(1-delta_z); + g_z = g_z_bl * (delta_x) * (delta_y) * (delta_z) + g_z_br * (1 - delta_x) * (delta_y) * (delta_z) + + g_z_bu * (delta_x) * (1 - delta_y) * (delta_z) + g_z_bru * (1 - delta_x) * (1 - delta_y) * (delta_z) + + g_z_tl * (delta_x) * (delta_y) * (1 - delta_z) + g_z_tr * (1 - delta_x) * (delta_y) * (1 - delta_z) + + g_z_tu * (delta_x) * (1 - delta_y) * (1 - delta_z) + g_z_tru * (1 - delta_x) * (1 - delta_y) * (1 - delta_z); grav_x_dev[tid] = g_x; grav_y_dev[tid] = g_y; grav_z_dev[tid] = g_z; - } - -//Call the kernel to compote the gravitational field at the particles positions ( CIC ) -void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ){ - +// Call the kernel to compote the gravitational field at the particles positions +// ( CIC ) +void Particles3D::Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -257,45 +281,47 @@ void Particles_3D::Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_loca // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid ); - CudaCheckError(); + hipLaunchKernelGGL(Get_Gravity_CIC_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, gravity_x_dev, gravity_y_dev, + gravity_z_dev, pos_x_dev, pos_y_dev, pos_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, xMin, yMin, + zMin, xMax, yMax, zMax, dx, dy, dz, nx_local, ny_local, nz_local, n_ghost_particles_grid); + GPU_Error_Check(); } - } -#endif //PARTICLES_GPU - -#ifdef GRAVITY_GPU + #endif // PARTICLES_GPU -void __global__ Copy_Particles_Density_Kernel( Real *dst_density, Real *src_density, int nx_local, int ny_local, int nz_local, int n_ghost ){ + #ifdef GRAVITY_GPU +void __global__ Copy_Particles_Density_Kernel(Real *dst_density, Real *src_density, int nx_local, int ny_local, + int nz_local, int n_ghost) +{ int tid_x, tid_y, tid_z, tid_CIC, tid_dens; tid_x = blockIdx.x * blockDim.x + threadIdx.x; tid_y = blockIdx.y * blockDim.y + threadIdx.y; tid_z = blockIdx.z * blockDim.z + threadIdx.z; - if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local ) return; + if (tid_x >= nx_local || tid_y >= ny_local || tid_z >= nz_local) { + return; + } - tid_dens = tid_x + tid_y*nx_local + tid_z*nx_local*ny_local; + tid_dens = tid_x + tid_y * nx_local + tid_z * nx_local * ny_local; tid_x += n_ghost; tid_y += n_ghost; tid_z += n_ghost; int nx_CIC, ny_CIC; - nx_CIC = nx_local + 2*n_ghost; - ny_CIC = ny_local + 2*n_ghost; - tid_CIC = tid_x + tid_y*nx_CIC + tid_z*nx_CIC*ny_CIC; + nx_CIC = nx_local + 2 * n_ghost; + ny_CIC = ny_local + 2 * n_ghost; + tid_CIC = tid_x + tid_y * nx_CIC + tid_z * nx_CIC * ny_CIC; dst_density[tid_dens] = src_density[tid_CIC]; - } - - -//Copy the particles density to the density array in Grav to compute the potential -void Grid3D::Copy_Particles_Density_GPU( ){ - +// Copy the particles density to the density array in Grav to compute the +// potential +void Grid3D::Copy_Particles_Density_GPU() +{ int nx_local, ny_local, nz_local, n_ghost; n_ghost = Particles.G.n_ghost_particles_grid; nx_local = Grav.nx_local; @@ -303,9 +329,9 @@ void Grid3D::Copy_Particles_Density_GPU( ){ nz_local = Grav.nz_local; // set values for GPU kernels - int tpb_x = 16; - int tpb_y = 8; - int tpb_z = 8; + int tpb_x = 16; + int tpb_y = 8; + int tpb_z = 8; int ngrid_x = (nx_local - 1) / tpb_x + 1; int ngrid_y = (ny_local - 1) / tpb_y + 1; int ngrid_z = (nz_local - 1) / tpb_z + 1; @@ -314,10 +340,10 @@ void Grid3D::Copy_Particles_Density_GPU( ){ // number of threads per 1D block dim3 dim3dBlock(tpb_x, tpb_y, tpb_z); - hipLaunchKernelGGL( Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.density_d, Particles.G.density_dev, nx_local, ny_local, nz_local, n_ghost ); + hipLaunchKernelGGL(Copy_Particles_Density_Kernel, dim3dGrid, dim3dBlock, 0, 0, Grav.F.density_d, + Particles.G.density_dev, nx_local, ny_local, nz_local, n_ghost); } + #endif // GRAVITY_GPU -#endif//GRAVITY_GPU - -#endif//PARTICLES +#endif // PARTICLES diff --git a/src/particles/io_particles.cpp b/src/particles/io_particles.cpp index ad3ee9127..7aaa627d6 100644 --- a/src/particles/io_particles.cpp +++ b/src/particles/io_particles.cpp @@ -1,188 +1,181 @@ #ifdef PARTICLES -#include -#include -#include -#include -#include -#include -#include "../global/global.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "../particles/particles_3D.h" - -#ifdef HDF5 -#include -#endif -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include + #include + #include + #include + #include -// #define OUTPUT_PARTICLES_DATA + #include + #include + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" -void Particles_3D::Load_Particles_Data( struct parameters *P){ - char filename[100]; - char timestep[20]; - int nfile = P->nfile; //output step you want to read from - char filename_counter[100]; - // create the filename to read from + #ifdef HDF5 + #include + #endif + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif - strcpy(filename, P->indir); - sprintf(timestep, "%d_particles", nfile); - strcat(filename,timestep); +// #define OUTPUT_PARTICLES_DATA - #if defined BINARY +void Particles3D::Load_Particles_Data(struct Parameters *P) +{ + #ifndef HDF5 chprintf("\nERROR: Particles only support HDF5 outputs\n"); exit(-1); - #elif defined HDF5 - strcat(filename,".h5"); #endif + // construct the filename to read from #ifdef MPI_CHOLLA - #ifdef TILED_INITIAL_CONDITIONS - sprintf(filename,"%sics_%dMpc_%d_particles.h5", P->indir, (int) P->tile_length/1000, G.nx_local); //Everyone reads the same file - #else - if (strcmp(P->init, "Disk_3D_particles") != 0) sprintf(filename,"%s.%d",filename,procID); - #endif //TILED_INITIAL_CONDITIONS + #ifdef TILED_INITIAL_CONDITIONS + // Every process reads the same file + const std::string base_fname = + ("ics_" + std::to_string((int)P->tile_length / 1000) + "Mpc_" + std::to_string(G.nx_local) + "_particles.h5"); + #else + const int nfile = P->nfile; // output step you want to read from + const std::string base_fname = (std::to_string(nfile) + "_particles.h5." + std::to_string(procID)); + #endif // TILED_INITIAL_CONDITIONS #endif - chprintf(" Loading particles file: %s \n", filename ); + const std::string filename = std::string(P->indir) + base_fname; + + chprintf(" Loading particles file: %s \n", filename.c_str()); #ifdef HDF5 - hid_t file_id; - herr_t status; + hid_t file_id; + herr_t status; // open the file - file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); if (file_id < 0) { printf("Unable to open input file.\n"); exit(0); } - Load_Particles_Data_HDF5(file_id, nfile, P ); + Load_Particles_Data_HDF5(file_id, nfile, P); #endif } - -void Grid3D::WriteData_Particles( struct parameters P, int nfile) +void Grid3D::WriteData_Particles(struct Parameters P, int nfile) { // Write the particles data to file - OutputData_Particles( P, nfile); + OutputData_Particles(P, nfile); } + #ifdef HDF5 -#ifdef HDF5 - -void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct parameters *P ) +void Particles3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Parameters *P) { int i, j, k, id, buf_id; - hid_t attribute_id, dataset_id; - Real *dataset_buffer_px; - Real *dataset_buffer_py; - Real *dataset_buffer_pz; - Real *dataset_buffer_vx; - Real *dataset_buffer_vy; - Real *dataset_buffer_vz; - Real *dataset_buffer_m; - #ifdef PARTICLE_AGE - Real *dataset_buffer_age; - #endif - herr_t status; + hid_t attribute_id, dataset_id; + Real *dataset_buffer_px; + Real *dataset_buffer_py; + Real *dataset_buffer_pz; + Real *dataset_buffer_vx; + Real *dataset_buffer_vy; + Real *dataset_buffer_vz; + Real *dataset_buffer_m; + #ifdef PARTICLE_AGE + Real *dataset_buffer_age; + #endif + herr_t status; part_int_t n_to_load, pIndx; attribute_id = H5Aopen(file_id, "n_particles_local", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_LONG, &n_to_load); - status = H5Aclose(attribute_id); + status = H5Aread(attribute_id, H5T_NATIVE_LONG, &n_to_load); + status = H5Aclose(attribute_id); - #ifdef COSMOLOGY + #ifdef COSMOLOGY attribute_id = H5Aopen(file_id, "current_z", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_z); - status = H5Aclose(attribute_id); + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_z); + status = H5Aclose(attribute_id); attribute_id = H5Aopen(file_id, "current_a", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_a); - status = H5Aclose(attribute_id); - #endif + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, ¤t_a); + status = H5Aclose(attribute_id); + #endif - #ifdef SINGLE_PARTICLE_MASS + #ifdef SINGLE_PARTICLE_MASS attribute_id = H5Aopen(file_id, "particle_mass", H5P_DEFAULT); - status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &particle_mass); - status = H5Aclose(attribute_id); - chprintf( " Using Single mass for DM particles: %f Msun/h\n", particle_mass); - #endif + status = H5Aread(attribute_id, H5T_NATIVE_DOUBLE, &particle_mass); + status = H5Aclose(attribute_id); + chprintf(" Using Single mass for DM particles: %f Msun/h\n", particle_mass); + #endif - #ifndef MPI_CHOLLA + #ifndef MPI_CHOLLA chprintf(" Loading %ld particles\n", n_to_load); - #else - if (strcmp(P->init, "Disk_3D_particles") != 0) { - part_int_t n_total_load; - n_total_load = ReducePartIntSum( n_to_load ); - chprintf( " Total Particles To Load: %ld\n", n_total_load ); - } + #else + part_int_t n_total_load; + n_total_load = ReducePartIntSum(n_to_load); + chprintf(" Total Particles To Load: %ld\n", n_total_load); // Print individual n_to_load // for ( int i=0; itile_length; // Rescale the particles position to the global domain chprintf(" Rescaling the Tiled Particles Positions... \n"); - chprintf(" Tile length: %f kpc/h \n", tile_length ); - chprintf(" N_Procs Z: %d Y: %d X: %d \n", nproc_z, nproc_y, nproc_x ); + chprintf(" Tile length: %f kpc/h \n", tile_length); + chprintf(" N_Procs Z: %d Y: %d X: %d \n", nproc_z, nproc_y, nproc_x); bool tile_length_difference = false; - if ( fabs( Lx_local - tile_length ) / Lx_local > 1e-2 ) tile_length_difference = true; - if ( fabs( Ly_local - tile_length ) / Ly_local > 1e-2 ) tile_length_difference = true; - if ( fabs( Lz_local - tile_length ) / Lz_local > 1e-2 ) tile_length_difference = true; + if (fabs(Lx_local - tile_length) / Lx_local > 1e-2) tile_length_difference = true; + if (fabs(Ly_local - tile_length) / Ly_local > 1e-2) tile_length_difference = true; + if (fabs(Lz_local - tile_length) / Lz_local > 1e-2) tile_length_difference = true; - if ( tile_length_difference ){ + if (tile_length_difference) { std::cout << " WARNING: Local Domain Length Different to Tile Length " << std::endl; - printf(" Domain Length: [ %f %f %f ]\n", Lz_local, Ly_local, Lx_local ); - printf(" Tile Length: %f \n", tile_length ); + printf(" Domain Length: [ %f %f %f ]\n", Lz_local, Ly_local, Lx_local); + printf(" Tile Length: %f \n", tile_length); } - #endif + #endif - //Loop over to input buffers and load each particle - for( pIndx=0; pIndx G.domainMax_x ){ + if (pPos_x < G.domainMin_x || pPos_x > G.domainMax_x) { std::cout << " Particle outside global domain " << std::endl; } - if ( pPos_y < G.domainMin_y || pPos_y > G.domainMax_y ){ + if (pPos_y < G.domainMin_y || pPos_y > G.domainMax_y) { std::cout << " Particle outside global domain " << std::endl; } - if ( pPos_z < G.domainMin_z || pPos_z > G.domainMax_z ){ + if (pPos_z < G.domainMin_z || pPos_z > G.domainMax_z) { std::cout << " Particle outside global domain " << std::endl; } - if ( pPos_x < G.xMin || pPos_x >= G.xMax ) in_local = false; - if ( pPos_y < G.yMin || pPos_y >= G.yMax ) in_local = false; - if ( pPos_z < G.zMin || pPos_z >= G.zMax ) in_local = false; - if ( ! in_local ) { - #ifdef PARTICLE_IDS + if (pPos_x < G.xMin || pPos_x >= G.xMax) { + in_local = false; + } + if (pPos_y < G.yMin || pPos_y >= G.yMax) { + in_local = false; + } + if (pPos_z < G.zMin || pPos_z >= G.zMax) { + in_local = false; + } + if (!in_local) { + #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << pID << std::endl; - #else + #else std::cout << " Particle outside Local domain " << std::endl; - #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + #endif + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << pPos_x << std::endl; std::cout << " Particle Y: " << pPos_y << std::endl; std::cout << " Particle Z: " << pPos_z << std::endl; continue; } - //Keep track of the max and min position and velocity to print Initial Statistics - if ( pPos_x > px_max ) px_max = pPos_x; - if ( pPos_y > py_max ) py_max = pPos_y; - if ( pPos_z > pz_max ) pz_max = pPos_z; + // Keep track of the max and min position and velocity to print Initial + // Statistics + if (pPos_x > px_max) { + px_max = pPos_x; + } + if (pPos_y > py_max) { + py_max = pPos_y; + } + if (pPos_z > pz_max) { + pz_max = pPos_z; + } - if ( pPos_x < px_min ) px_min = pPos_x; - if ( pPos_y < py_min ) py_min = pPos_y; - if ( pPos_z < pz_min ) pz_min = pPos_z; + if (pPos_x < px_min) { + px_min = pPos_x; + } + if (pPos_y < py_min) { + py_min = pPos_y; + } + if (pPos_z < pz_min) { + pz_min = pPos_z; + } - if ( pVel_x > vx_max ) vx_max = pVel_x; - if ( pVel_y > vy_max ) vy_max = pVel_y; - if ( pVel_z > vz_max ) vz_max = pVel_z; + if (pVel_x > vx_max) { + vx_max = pVel_x; + } + if (pVel_y > vy_max) { + vy_max = pVel_y; + } + if (pVel_z > vz_max) { + vz_max = pVel_z; + } - if ( pVel_x < vx_min ) vx_min = pVel_x; - if ( pVel_y < vy_min ) vy_min = pVel_y; - if ( pVel_z < vz_min ) vz_min = pVel_z; + if (pVel_x < vx_min) { + vx_min = pVel_x; + } + if (pVel_y < vy_min) { + vy_min = pVel_y; + } + if (pVel_z < vz_min) { + vz_min = pVel_z; + } #ifdef PARTICLES_CPU - //Add the particle data to the particles vectors - pos_x.push_back( pPos_x ); - pos_y.push_back( pPos_y ); - pos_z.push_back( pPos_z ); - vel_x.push_back( pVel_x ); - vel_y.push_back( pVel_y ); - vel_z.push_back( pVel_z ); - grav_x.push_back( 0.0 ); - grav_y.push_back( 0.0 ); - grav_z.push_back( 0.0 ); - #ifndef SINGLE_PARTICLE_MASS - mass.push_back( pMass ); - #endif - #ifdef PARTICLE_IDS + // Add the particle data to the particles vectors + pos_x.push_back(pPos_x); + pos_y.push_back(pPos_y); + pos_z.push_back(pPos_z); + vel_x.push_back(pVel_x); + vel_y.push_back(pVel_y); + vel_z.push_back(pVel_z); + grav_x.push_back(0.0); + grav_y.push_back(0.0); + grav_z.push_back(0.0); + #ifndef SINGLE_PARTICLE_MASS + mass.push_back(pMass); + #endif + #ifdef PARTICLE_IDS partIDs.push_back(pID); - #endif - #ifdef PARTICLE_AGE - age.push_back( pAge ); - #endif - n_local += 1; //Add 1 to the local number of particles - #endif//PARTICLES_CPU + #endif + #ifdef PARTICLE_AGE + age.push_back(pAge); + #endif + n_local += 1; // Add 1 to the local number of particles + #endif // PARTICLES_CPU } - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU // Alocate memory in GPU for particle data // particles_array_size = (part_int_t) n_to_load; - particles_array_size = Compute_Particles_GPU_Array_Size( n_to_load ); - chprintf( " Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, G.gpu_allocation_factor, particles_array_size); - Allocate_Particles_GPU_Array_Real( &pos_x_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &pos_y_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &pos_z_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &vel_x_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &vel_y_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &vel_z_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &grav_x_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &grav_y_dev, particles_array_size); - Allocate_Particles_GPU_Array_Real( &grav_z_dev, particles_array_size); + particles_array_size = Compute_Particles_GPU_Array_Size(n_to_load); + chprintf(" Allocating GPU buffer size: %ld * %f = %ld \n", n_to_load, G.gpu_allocation_factor, particles_array_size); + Allocate_Particles_GPU_Array_Real(&pos_x_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&pos_y_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&pos_z_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&vel_x_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&vel_y_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&vel_z_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&grav_x_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&grav_y_dev, particles_array_size); + Allocate_Particles_GPU_Array_Real(&grav_z_dev, particles_array_size); + #ifndef SINGLE_PARTICLE_MASS + Allocate_Particles_GPU_Array_Real(&mass_dev, particles_array_size); + #endif + #ifdef PARTICLE_IDS + Allocate_Particles_GPU_Array_Part_Int(&partIDs_dev, particles_array_size); + #endif + #ifdef PARTICLE_AGE + Allocate_Particles_GPU_Array_Real(&age_dev, particles_array_size); + #endif + n_local = n_to_load; - chprintf( " Allocated GPU memory for particle data\n"); + chprintf(" Allocated GPU memory for particle data\n"); // printf( " Loaded %ld particles ", n_to_load); - //Copyt the particle data to GPU memory - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_px, pos_x_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_py, pos_y_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_pz, pos_z_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vx, vel_x_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vy, vel_y_dev, n_local); - Copy_Particles_Array_Real_Host_to_Device( dataset_buffer_vz, vel_z_dev, n_local); - #endif + // Copy the particle data to GPU memory + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_px, pos_x_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_py, pos_y_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_pz, pos_z_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vx, vel_x_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vy, vel_y_dev, n_local); + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_vz, vel_z_dev, n_local); + #ifndef SINGLE_PARTICLE_MASS + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_m, mass_dev, n_local); + #endif + #ifdef PARTICLE_IDS + Copy_Particles_Array_Int_Host_to_Device(dataset_buffer_IDs, partIDs_dev, n_local); + #endif + #ifdef PARTICLE_AGE + Copy_Particles_Array_Real_Host_to_Device(dataset_buffer_age, age_dev, n_local); + #endif + #endif // PARTICLES_GPU - #ifndef MPI_CHOLLA - chprintf( " Loaded %ld particles\n", n_local ); - #else + #ifndef MPI_CHOLLA + chprintf(" Loaded %ld particles\n", n_local); + #else MPI_Barrier(world); part_int_t n_total_loaded; - n_total_loaded = ReducePartIntSum( n_local ); + n_total_loaded = ReducePartIntSum(n_local); n_total_initial = n_total_loaded; - chprintf( " Total Particles Loaded: %ld\n", n_total_loaded ); - #endif + chprintf(" Total Particles Loaded: %ld\n", n_total_loaded); + #endif - #ifdef MPI_CHOLLA - Real px_max_g = ReduceRealMax( px_max ); - Real py_max_g = ReduceRealMax( py_max ); - Real pz_max_g = ReduceRealMax( pz_max ); - Real vx_max_g = ReduceRealMax( vx_max ); - Real vy_max_g = ReduceRealMax( vy_max ); - Real vz_max_g = ReduceRealMax( vz_max ); - - Real px_min_g = ReduceRealMin( px_min ); - Real py_min_g = ReduceRealMin( py_min ); - Real pz_min_g = ReduceRealMin( pz_min ); - Real vx_min_g = ReduceRealMin( vx_min ); - Real vy_min_g = ReduceRealMin( vy_min ); - Real vz_min_g = ReduceRealMin( vz_min ); - #else + #ifdef MPI_CHOLLA + Real px_max_g = ReduceRealMax(px_max); + Real py_max_g = ReduceRealMax(py_max); + Real pz_max_g = ReduceRealMax(pz_max); + Real vx_max_g = ReduceRealMax(vx_max); + Real vy_max_g = ReduceRealMax(vy_max); + Real vz_max_g = ReduceRealMax(vz_max); + + Real px_min_g = ReduceRealMin(px_min); + Real py_min_g = ReduceRealMin(py_min); + Real pz_min_g = ReduceRealMin(pz_min); + Real vx_min_g = ReduceRealMin(vx_min); + Real vy_min_g = ReduceRealMin(vy_min); + Real vz_min_g = ReduceRealMin(vz_min); + #else Real px_max_g = px_max; Real py_max_g = py_max; Real pz_max_g = pz_max; @@ -406,45 +451,45 @@ void Particles_3D::Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct par Real vx_min_g = vx_min; Real vy_min_g = vy_min; Real vz_min_g = vz_min; - #endif//MPI_CHOLLA - - //Print initial Statistics - #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) - chprintf( " Pos X Min: %f Max: %f [ kpc/h ]\n", px_min_g, px_max_g); - chprintf( " Pos Y Min: %f Max: %f [ kpc/h ]\n", py_min_g, py_max_g); - chprintf( " Pos Z Min: %f Max: %f [ kpc/h ]\n", pz_min_g, pz_max_g); - chprintf( " Vel X Min: %f Max: %f [ km/s ]\n", vx_min_g, vx_max_g); - chprintf( " Vel Y Min: %f Max: %f [ km/s ]\n", vy_min_g, vy_max_g); - chprintf( " Vel Z Min: %f Max: %f [ km/s ]\n", vz_min_g, vz_max_g); - #endif//PRINT_INITIAL_STATS - - //Free the buffers to used to load the hdf5 files + #endif // MPI_CHOLLA + + // Print initial Statistics + #if defined(PRINT_INITIAL_STATS) && defined(COSMOLOGY) + chprintf(" Pos X Min: %f Max: %f [ kpc/h ]\n", px_min_g, px_max_g); + chprintf(" Pos Y Min: %f Max: %f [ kpc/h ]\n", py_min_g, py_max_g); + chprintf(" Pos Z Min: %f Max: %f [ kpc/h ]\n", pz_min_g, pz_max_g); + chprintf(" Vel X Min: %f Max: %f [ km/s ]\n", vx_min_g, vx_max_g); + chprintf(" Vel Y Min: %f Max: %f [ km/s ]\n", vy_min_g, vy_max_g); + chprintf(" Vel Z Min: %f Max: %f [ km/s ]\n", vz_min_g, vz_max_g); + #endif // PRINT_INITIAL_STATS + + // Free the buffers to used to load the hdf5 files free(dataset_buffer_px); free(dataset_buffer_py); free(dataset_buffer_pz); free(dataset_buffer_vx); free(dataset_buffer_vy); free(dataset_buffer_vz); - #ifndef SINGLE_PARTICLE_MASS + #ifndef SINGLE_PARTICLE_MASS free(dataset_buffer_m); - #endif - #ifdef PARTICLE_IDS + #endif + #ifdef PARTICLE_IDS free(dataset_buffer_IDs); - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE free(dataset_buffer_age); - #endif + #endif } - /*! \fn void Write_Header_HDF5(hid_t file_id) * \brief Write the relevant header info to the HDF5 file. */ -void Grid3D::Write_Particles_Header_HDF5( hid_t file_id){ - hid_t attribute_id, dataspace_id; - herr_t status; - hsize_t attr_dims; - int int_data[3]; - Real Real_data[3]; +void Grid3D::Write_Particles_Header_HDF5(hid_t file_id) +{ + hid_t attribute_id, dataspace_id; + herr_t status; + hsize_t attr_dims; + int int_data[3]; + Real Real_data[3]; // Single attributes first attr_dims = 1; @@ -455,292 +500,285 @@ void Grid3D::Write_Particles_Header_HDF5( hid_t file_id){ // Write the attribute data status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.t); // Close the attribute - status = H5Aclose(attribute_id); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "dt_particles", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt); - status = H5Aclose(attribute_id); + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.dt); + status = H5Aclose(attribute_id); attribute_id = H5Acreate(file_id, "n_particles_local", H5T_STD_I64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_ULONG, &Particles.n_local); - status = H5Aclose(attribute_id); - + status = H5Awrite(attribute_id, H5T_NATIVE_LONG, &Particles.n_local); + status = H5Aclose(attribute_id); - #ifdef SINGLE_PARTICLE_MASS + #ifdef SINGLE_PARTICLE_MASS attribute_id = H5Acreate(file_id, "particle_mass", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass); - status = H5Aclose(attribute_id); - #endif - - #ifdef COSMOLOGY + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Particles.particle_mass); + status = H5Aclose(attribute_id); + #endif + + #ifdef COSMOLOGY attribute_id = H5Acreate(file_id, "current_z", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z); - status = H5Aclose(attribute_id); - + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_z); + status = H5Aclose(attribute_id); + attribute_id = H5Acreate(file_id, "current_a", H5T_IEEE_F64BE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); - status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a); - status = H5Aclose(attribute_id); - #endif + status = H5Awrite(attribute_id, H5T_NATIVE_DOUBLE, &Cosmo.current_a); + status = H5Aclose(attribute_id); + #endif status = H5Sclose(dataspace_id); - } - -void Grid3D::Write_Particles_Data_HDF5( hid_t file_id){ +void Grid3D::Write_Particles_Data_HDF5(hid_t file_id) +{ part_int_t i, j, k, id, buf_id; - hid_t dataset_id, dataspace_id; - Real *dataset_buffer; - part_int_t *dataset_buffer_IDs; - herr_t status; + hid_t dataset_id, dataspace_id; + Real *dataset_buffer; + #ifdef PARTICLE_IDS + part_int_t *dataset_buffer_IDs; + #endif + herr_t status; part_int_t n_local = Particles.n_local; - hsize_t dims[1]; - dataset_buffer = (Real *) malloc(n_local*sizeof(Real)); + hsize_t dims[1]; + dataset_buffer = (Real *)malloc(n_local * sizeof(Real)); bool output_particle_data; - #ifdef OUTPUT_PARTICLES_DATA + #ifdef OUTPUT_PARTICLES_DATA output_particle_data = true; - #else + #else output_particle_data = false; - #endif - - #ifdef PARTICLES_GPU - //Copy the device arrays from the device to the host - CudaSafeCall( cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif//PARTICLES_GPU - #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU) - CudaSafeCall( cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential*sizeof(Real), cudaMemcpyDeviceToHost) ); - #endif//OUTPUT_POTENTIAL - + #endif + #ifdef PARTICLES_GPU + // Copy the device arrays from the device to the host + GPU_Error_Check(cudaMemcpy(Particles.G.density, Particles.G.density_dev, Particles.G.n_cells * sizeof(Real), + cudaMemcpyDeviceToHost)); + #endif // PARTICLES_GPU + #if defined(OUTPUT_POTENTIAL) && defined(ONLY_PARTICLES) && defined(GRAVITY_GPU) + GPU_Error_Check(cudaMemcpy(Grav.F.potential_h, Grav.F.potential_d, Grav.n_cells_potential * sizeof(Real), + cudaMemcpyDeviceToHost)); + #endif // OUTPUT_POTENTIAL // Count Current Total Particles part_int_t N_particles_total; - #ifdef MPI_CHOLLA - N_particles_total = ReducePartIntSum( Particles.n_local ); - #else + #ifdef MPI_CHOLLA + N_particles_total = ReducePartIntSum(Particles.n_local); + #else N_particles_total = Particles.n_local; - #endif - - //Print the total particles when saving the particles data - chprintf( " Total Particles: %ld\n", N_particles_total ); + #endif - //Print a warning if the number of particles has changed from the initial number of particles. - //This will indicate an error on the Particles transfers. - if ( N_particles_total != Particles.n_total_initial ) chprintf( " WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total ); + // Print the total particles when saving the particles data + chprintf(" Total Particles: %ld\n", N_particles_total); + // Print a warning if the number of particles has changed from the initial + // number of particles. This will indicate an error on the Particles + // transfers. + if (N_particles_total != Particles.n_total_initial) { + chprintf(" WARNING: Lost Particles: %d \n", Particles.n_total_initial - N_particles_total); + } // Create the data space for the datasets - dims[0] = n_local; + dims[0] = n_local; dataspace_id = H5Screate_simple(1, dims, NULL); - //Copy the particles data to the hdf5_buffers and create the data_sets + // Copy the particles data to the hdf5_buffers and create the data_sets - // Copy the pos_x vector to the memory buffer - #ifdef PARTICLES_CPU - for ( i=0; i -#include -#include -#include -#include -#include "../io/io.h" -#include "../grid/grid3D.h" -#include "../utils/prng_utilities.h" -#include "../model/disk_galaxy.h" -#include "../particles/particles_3D.h" -#include "../utils/error_handling.h" - -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif - -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif - -Particles_3D::Particles_3D( void ): - TRANSFER_DENSITY_BOUNDARIES(false), - TRANSFER_PARTICLES_BOUNDARIES(false) -{} - -void Grid3D::Initialize_Particles( struct parameters *P ){ - - chprintf( "\nInitializing Particles...\n"); - - Particles.Initialize( P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal ); - - #if defined (PARTICLES_GPU) && defined (GRAVITY_GPU) - // Set the GPU array for the particles potential equal to the Gravity GPU array for the potential - Particles.G.potential_dev = Grav.F.potential_d; - #endif + #include "particles_3D.h" + + #include - if (strcmp(P->init, "Uniform")==0) Initialize_Uniform_Particles(); + #include + #include + #include + #include + + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../model/disk_galaxy.h" + #include "../utils/error_handling.h" + #include "../utils/prng_utilities.h" #ifdef MPI_CHOLLA - MPI_Barrier( world ); + #include "../mpi/mpi_routines.h" #endif - chprintf( "Particles Initialized Successfully. \n\n"); + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -} +Particles3D::Particles3D(void) : TRANSFER_DENSITY_BOUNDARIES(false), TRANSFER_PARTICLES_BOUNDARIES(false) {} -void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal){ +void Grid3D::Initialize_Particles(struct Parameters *P) +{ + chprintf("\nInitializing Particles...\n"); - //Initialize local and total number of particles to 0 - n_local = 0; - n_total = 0; + Particles.Initialize(P, Grav, H.xbound, H.ybound, H.zbound, H.xdglobal, H.ydglobal, H.zdglobal); + + #if defined(PARTICLES_GPU) && defined(GRAVITY_GPU) + // Set the GPU array for the particles potential equal to the Gravity GPU + // array for the potential + Particles.G.potential_dev = Grav.F.potential_d; + #endif + + if (strcmp(P->init, "Uniform") == 0) { + Initialize_Uniform_Particles(); + } + + #ifdef MPI_CHOLLA + MPI_Barrier(world); + #endif + chprintf("Particles Initialized Successfully. \n\n"); +} + +void Particles3D::Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, + Real ydglobal, Real zdglobal) +{ + // Initialize local and total number of particles to 0 + n_local = 0; + n_total = 0; n_total_initial = 0; - //Initialize the simulation time and delta_t to 0 + // Initialize the simulation time and delta_t to 0 dt = 0.0; - t = 0.0; - //Set the maximum delta_t for particles, this can be changed depending on the problem. + t = 0.0; + // Set the maximum delta_t for particles, this can be changed depending on the + // problem. max_dt = 10000; - //Courant CFL condition factor for particles + // Courant CFL condition factor for particles C_cfl = 0.3; #ifndef SINGLE_PARTICLE_MASS - particle_mass = 0; //The particle masses are stored in a separate array + particle_mass = 0; // The particle masses are stored in a separate array #endif #ifdef PARTICLES_CPU - //Vectors for positions, velocities and accelerations + // Vectors for positions, velocities and accelerations real_vector_t pos_x; real_vector_t pos_y; real_vector_t pos_z; @@ -78,32 +81,33 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, real_vector_t grav_y; real_vector_t grav_z; - #ifndef SINGLE_PARTICLE_MASS - //Vector for masses + #ifndef SINGLE_PARTICLE_MASS + // Vector for masses real_vector_t mass; - #endif - #ifdef PARTICLE_IDS - //Vector for particle IDs + #endif + #ifdef PARTICLE_IDS + // Vector for particle IDs int_vector_t partIDs; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE real_vector_t age; - #endif + #endif - #ifdef MPI_CHOLLA - //Vectors for the indices of the particles that need to be transferred via MPI + #ifdef MPI_CHOLLA + // Vectors for the indices of the particles that need to be transferred via + // MPI int_vector_t out_indxs_vec_x0; int_vector_t out_indxs_vec_x1; int_vector_t out_indxs_vec_y0; int_vector_t out_indxs_vec_y1; int_vector_t out_indxs_vec_z0; int_vector_t out_indxs_vec_z1; - #endif + #endif - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU - //Initialize Grid Values - //Local and total number of cells + // Initialize Grid Values + // Local and total number of cells G.nx_local = Grav.nx_local; G.ny_local = Grav.ny_local; G.nz_local = Grav.nz_local; @@ -111,38 +115,40 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, G.ny_total = Grav.ny_total; G.nz_total = Grav.nz_total; - //Uniform (dx, dy, dz) + // Uniform (dx, dy, dz) G.dx = Grav.dx; G.dy = Grav.dy; G.dz = Grav.dz; - //Left boundaries of the local domain + // Left boundaries of the local domain G.xMin = Grav.xMin; G.yMin = Grav.yMin; G.zMin = Grav.zMin; - //Right boundaries of the local domain + // Right boundaries of the local domain G.xMax = Grav.xMax; G.yMax = Grav.yMax; G.zMax = Grav.zMax; - //Left boundaries of the global domain + // Left boundaries of the global domain G.domainMin_x = xbound; G.domainMin_y = ybound; G.domainMin_z = zbound; - //Right boundaries of the global domain + // Right boundaries of the global domain G.domainMax_x = xbound + xdglobal; G.domainMax_y = ybound + ydglobal; G.domainMax_z = zbound + zdglobal; - //Number of ghost cells for the particles grid. For CIC one ghost cell is needed + // Number of ghost cells for the particles grid. For CIC one ghost cell is + // needed G.n_ghost_particles_grid = 1; - //Number of cells for the particles grid including ghost cells - G.n_cells = (G.nx_local+2*G.n_ghost_particles_grid) * (G.ny_local+2*G.n_ghost_particles_grid) * (G.nz_local+2*G.n_ghost_particles_grid); + // Number of cells for the particles grid including ghost cells + G.n_cells = (G.nx_local + 2 * G.n_ghost_particles_grid) * (G.ny_local + 2 * G.n_ghost_particles_grid) * + (G.nz_local + 2 * G.n_ghost_particles_grid); - //Set the boundary types + // Set the boundary types #ifdef MPI_CHOLLA G.boundary_type_x0 = P->xlg_bcnd; G.boundary_type_x1 = P->xug_bcnd; @@ -158,41 +164,49 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, G.boundary_type_z0 = P->zl_bcnd; G.boundary_type_z1 = P->zu_bcnd; #endif - + #ifdef PARTICLES_GPU - //Factor to allocate the particles data arrays on the GPU. - //When using MPI particles will be transferred to other GPU, for that reason we need extra memory allocated - #ifdef MPI_CHOLLA + // Factor to allocate the particles data arrays on the GPU. + // When using MPI particles will be transferred to other GPU, for that + // reason we need extra memory allocated + #ifdef MPI_CHOLLA G.gpu_allocation_factor = 1.25; - #else + #else G.gpu_allocation_factor = 1.0; - #endif - - G.size_blocks_array = 1024*128; - G.n_cells_potential = ( G.nx_local + 2*N_GHOST_POTENTIAL ) * ( G.ny_local + 2*N_GHOST_POTENTIAL ) * ( G.nz_local + 2*N_GHOST_POTENTIAL ); + #endif - #ifdef SINGLE_PARTICLE_MASS - mass_dev = NULL; //This array won't be used - #endif + G.size_blocks_array = 1024 * 128; + G.n_cells_potential = (G.nx_local + 2 * N_GHOST_POTENTIAL) * (G.ny_local + 2 * N_GHOST_POTENTIAL) * + (G.nz_local + 2 * N_GHOST_POTENTIAL); + #ifdef SINGLE_PARTICLE_MASS + mass_dev = NULL; // This array won't be used + #endif - #endif //PARTICLES_GPU + #endif // PARTICLES_GPU // Flags for Initial and tranfer the particles and density - INITIAL = true; - TRANSFER_DENSITY_BOUNDARIES = false; + INITIAL = true; + TRANSFER_DENSITY_BOUNDARIES = false; TRANSFER_PARTICLES_BOUNDARIES = false; Allocate_Memory(); - //Initialize the particles density and gravitational field to 0. + // Initialize the particles density and gravitational field to 0. Initialize_Grid_Values(); // Initialize Particles - if (strcmp(P->init, "Spherical_Overdensity_3D")==0) Initialize_Sphere(P); - else if (strcmp(P->init, "Zeldovich_Pancake")==0) Initialize_Zeldovich_Pancake( P ); - else if (strcmp(P->init, "Read_Grid")==0) Load_Particles_Data( P ); - else if (strcmp(P->init, "Disk_3D_particles") == 0) Initialize_Disk_Stellar_Clusters(P); + if (strcmp(P->init, "Spherical_Overdensity_3D") == 0) { + Initialize_Sphere(P); + } else if (strcmp(P->init, "Zeldovich_Pancake") == 0) { + Initialize_Zeldovich_Pancake(P); + } else if (strcmp(P->init, "Read_Grid") == 0) { + Load_Particles_Data(P); + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) + } else if (strcmp(P->init, "Disk_3D_particles") == 0) { + Initialize_Disk_Stellar_Clusters(P); + #endif + } #ifdef MPI_CHOLLA n_total_initial = ReducePartIntSum(n_local); @@ -200,20 +214,24 @@ void Particles_3D::Initialize( struct parameters *P, Grav3D &Grav, Real xbound, n_total_initial = n_local; #endif - chprintf("Particles Initialized: \n n_local: %lu \n", n_local ); - chprintf(" n_total: %lu \n", n_total_initial ); - chprintf(" xDomain_local: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax ); - chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.domainMin_x, G.domainMax_x, G.domainMin_y, G.domainMax_y, G.domainMin_z, G.domainMax_z); - chprintf(" dx: %f %f %f\n", G.dx, G.dy, G.dz ); + chprintf("Particles Initialized: \n n_local: %lu \n", n_local); + chprintf(" n_total: %lu \n", n_total_initial); + chprintf(" xDomain_local: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.xMin, G.xMax, G.yMin, G.yMax, G.zMin, G.zMax); + chprintf(" xDomain_global: [%.4f %.4f ] [%.4f %.4f ] [%.4f %.4f ]\n", G.domainMin_x, G.domainMax_x, G.domainMin_y, + G.domainMax_y, G.domainMin_z, G.domainMax_z); + chprintf(" dx: %f %f %f\n", G.dx, G.dy, G.dz); #ifdef PARTICLE_IDS chprintf(" Tracking particle IDs\n"); #endif #if defined(MPI_CHOLLA) && defined(PRINT_DOMAIN) - for (int n=0; nprng_seed); - std::uniform_real_distribution xPositionPrng(G.xMin, G.xMax ); - std::uniform_real_distribution yPositionPrng(G.yMin, G.yMax ); - std::uniform_real_distribution zPositionPrng(G.zMin, G.zMax ); - while ( pID < n_particles_local ){ + std::uniform_real_distribution xPositionPrng(G.xMin, G.xMax); + std::uniform_real_distribution yPositionPrng(G.yMin, G.yMax); + std::uniform_real_distribution zPositionPrng(G.zMin, G.zMax); + while (pID < n_particles_local) { pPos_x = xPositionPrng(generator); pPos_y = yPositionPrng(generator); pPos_z = zPositionPrng(generator); - r = sqrt( (pPos_x-center_x)*(pPos_x-center_x) + (pPos_y-center_y)*(pPos_y-center_y) + (pPos_z-center_z)*(pPos_z-center_z) ); - if ( r > sphereR ) continue; + r = sqrt((pPos_x - center_x) * (pPos_x - center_x) + (pPos_y - center_y) * (pPos_y - center_y) + + (pPos_z - center_z) * (pPos_z - center_z)); + if (r > sphereR) { + continue; + } - #ifdef PARTICLES_CPU - //Copy the particle data to the particles vectors - pos_x.push_back( pPos_x ); - pos_y.push_back( pPos_y ); - pos_z.push_back( pPos_z); - vel_x.push_back( 0.0 ); - vel_y.push_back( 0.0 ); - vel_z.push_back( 0.0 ); - grav_x.push_back( 0.0 ); - grav_y.push_back( 0.0 ); - grav_z.push_back( 0.0 ); + #ifdef PARTICLES_CPU + // Copy the particle data to the particles vectors + pos_x.push_back(pPos_x); + pos_y.push_back(pPos_y); + pos_z.push_back(pPos_z); + vel_x.push_back(0.0); + vel_y.push_back(0.0); + vel_z.push_back(0.0); + grav_x.push_back(0.0); + grav_y.push_back(0.0); + grav_z.push_back(0.0); #ifdef PARTICLE_IDS - partIDs.push_back( pID ); + partIDs.push_back(pID); #endif #ifndef SINGLE_PARTICLE_MASS - mass.push_back( Mparticle ); + mass.push_back(Mparticle); #endif - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU // Copy the particle data to the temporal Host Buffers - temp_pos_x[pID] = pPos_x; - temp_pos_y[pID] = pPos_y; - temp_pos_z[pID] = pPos_z; - temp_vel_x[pID] = 0.0; - temp_vel_y[pID] = 0.0; - temp_vel_z[pID] = 0.0; + temp_pos_x[pID] = pPos_x; + temp_pos_y[pID] = pPos_y; + temp_pos_z[pID] = pPos_z; + temp_vel_x[pID] = 0.0; + temp_vel_y[pID] = 0.0; + temp_vel_z[pID] = 0.0; #ifndef SINGLE_PARTICLE_MASS - temp_mass[pID] = Mparticle; + temp_mass[pID] = Mparticle; + #endif + #ifdef PARTICLE_IDS + temp_id[pID] = pID; #endif - #endif //PARTICLES_GPU + #endif // PARTICLES_GPU pID += 1; } #ifdef PARTICLES_CPU n_local = pos_x.size(); - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU #if defined(PARTICLE_IDS) && defined(MPI_CHOLLA) - // Get global IDs: Offset the local IDs to get unique global IDs across the MPI ranks - chprintf( " Computing Global Particles IDs offset \n" ); + // Get global IDs: Offset the local IDs to get unique global IDs across the + // MPI ranks + chprintf(" Computing Global Particles IDs offset \n"); part_int_t global_id_offset; - global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); - #ifdef PARTICLES_CPU - for ( int p_indx=0; p_indxprng_seed); - std::gamma_distribution radialDist(2,1); //for generating cyclindrical radii - std::uniform_real_distribution zDist(0, 1); //for generating height above/below the disk. - std::uniform_real_distribution phiDist(0, 2*M_PI); //for generating phi - std::normal_distribution speedDist(0, 1); //for generating random speeds. - - Real M_d = Galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) - Real R_d = Galaxies::MW.getR_d(); // MW stellar disk scale length in kpc - Real Z_d = Galaxies::MW.getZ_d(); // MW stellar height scale length in kpc - Real R_max = sqrt(P->xlen*P->xlen + P->ylen*P->ylen)/2; - R_max = P->xlen / 2.0; + std::gamma_distribution radialDist(2, 1); // for generating cyclindrical radii + std::uniform_real_distribution zDist(-0.005, 0.005); + std::uniform_real_distribution vzDist(-1e-8, 1e-8); + std::uniform_real_distribution phiDist(0, + 2 * M_PI); // for generating phi + std::normal_distribution speedDist(0, + 1); // for generating random speeds. + + Real M_d = galaxies::MW.getM_d(); // MW disk mass in M_sun (assumed to be all in stars) + Real R_d = galaxies::MW.getR_d(); // MW stellar disk scale length in kpc + Real Z_d = galaxies::MW.getZ_d(); // MW stellar height scale length in kpc + Real R_max = sqrt(P->xlen * P->xlen + P->ylen * P->ylen) / 2; + R_max = P->xlen / 2.0; + + real_vector_t temp_pos_x; + real_vector_t temp_pos_y; + real_vector_t temp_pos_z; + real_vector_t temp_vel_x; + real_vector_t temp_vel_y; + real_vector_t temp_vel_z; + real_vector_t temp_grav_x; + real_vector_t temp_grav_y; + real_vector_t temp_grav_z; + real_vector_t temp_mass; + int_vector_t temp_ids; + real_vector_t temp_age; Real x, y, z, R, phi; Real vx, vy, vz, vel, ac; Real expFactor, vR_rms, vR, vPhi_str, vPhi, v_c2, vPhi_rand_rms, kappa2; - particle_mass = 1e4; //solar masses - //unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc radius - unsigned long int N = (long int)(6.5e6 * 0.9272485558395908); // 15kpc radius - long lost_particles = 0; - for ( unsigned long int i = 0; i < N; i++ ){ - do { - R = R_d*radialDist(generator); - } while (R > R_max); - - phi = phiDist(generator); - x = R * cos(phi); - y = R * sin(phi); - z = 0; - - if (x < G.xMin || x >= G.xMax) continue; - if (y < G.yMin || y >= G.yMax) continue; - if (z < G.zMin || z >= G.zMax) continue; - - ac = fabs(Galaxies::MW.gr_disk_D3D(R, 0) + Galaxies::MW.gr_halo_D3D(R, 0)); - vPhi = sqrt(R*ac); - - vx = -vPhi*sin(phi); - vy = vPhi*cos(phi); - vz = 0; - - #ifdef PARTICLES_CPU - //Copy the particle data to the particles vectors - pos_x.push_back(x); - pos_y.push_back(y); - pos_z.push_back(z); - vel_x.push_back(vx); - vel_y.push_back(vy); - vel_z.push_back(vz); - grav_x.push_back(0.0); - grav_y.push_back(0.0); - grav_z.push_back(0.0); - - #ifdef PARTICLE_IDS - partIDs.push_back(i); - #endif //PARTICLE_IDS - - #ifdef PARTICLE_AGE - //if (fabs(z) >= Z_d) age.push_back(1.1e4); - //else age.push_back(0.0); - age.push_back(0.0); - #endif - - #endif//PARTICLES_CPU - } + // unsigned long int N = (long int)(6.5e6 * 0.11258580827352116); //2kpc + // radius unsigned long int N = 13; //(long int)(6.5e6 * 0.9272485558395908); + // // 15kpc radius + Real total_mass = 0; + Real upper_limit_cluster_mass = 1e7; + long lost_particles = 0; + part_int_t id = -1; + while (total_mass < upper_limit_cluster_mass) { + Real cluster_mass = galaxies::MW.singleClusterMass(generator); + total_mass += cluster_mass; + id += 1; // do this here before we check whether the particle is in the MPI + // domain, otherwise could end up with duplicated IDs + do { + R = R_d * radialDist(generator); + } while (R > R_max); + + phi = phiDist(generator); + x = R * cos(phi); + y = R * sin(phi); + z = zDist(generator); + + if (x < G.xMin || x >= G.xMax) { + continue; + } + if (y < G.yMin || y >= G.yMax) { + continue; + } + if (z < G.zMin || z >= G.zMax) { + continue; + } - #ifdef PARTICLES_CPU - n_local = pos_x.size(); - #endif + ac = fabs(galaxies::MW.gr_disk_D3D(R, 0) + galaxies::MW.gr_halo_D3D(R, 0)); + vPhi = sqrt(R * ac); + + vx = -vPhi * sin(phi); + vy = vPhi * cos(phi); + vz = 0.0; // vzDist(generator); + + // add particle data to the particles vectors + temp_pos_x.push_back(x); + temp_pos_y.push_back(y); + temp_pos_z.push_back(z); + temp_vel_x.push_back(vx); + temp_vel_y.push_back(vy); + temp_vel_z.push_back(vz); + temp_grav_x.push_back(0.0); + temp_grav_y.push_back(0.0); + temp_grav_z.push_back(0.0); + temp_mass.push_back(cluster_mass); + temp_age.push_back(0.0); + temp_ids.push_back(id); + } - if (lost_particles > 0) chprintf(" lost %lu particles\n", lost_particles); - chprintf( " Stellar Disk Particles Initialized, n_local: %lu\n", n_local); -} + n_local = temp_pos_x.size(); + + /* + part_int_t global_id_offset = 0; + #ifdef MPI_CHOLLA + // Get global IDs: Offset the local IDs to get unique global IDs across + the MPI ranks chprintf( " Computing Global Particles IDs offset \n" ); + global_id_offset = Get_Particles_IDs_Global_MPI_Offset( n_local ); + #endif //MPI_CHOLLA + for ( int i=0; i 0) { + chprintf(" lost %lu particles\n", lost_particles); + } + chprintf( + "Stellar Disk Particles Initialized, n_total: %lu, n_local: %lu, " + "total_mass: %.3e s.m.\n", + id + 1, n_local, total_mass); +} + #endif - //No partidcles for the Zeldovich Pancake problem. n_local=0 +void Particles3D::Initialize_Zeldovich_Pancake(struct Parameters *P) +{ + // No particles for the Zeldovich Pancake problem. n_local=0 chprintf("Setting Zeldovich Pancake initial conditions...\n"); // n_local = pos_x.size(); n_local = 0; - chprintf( " Particles Zeldovich Pancake Initialized, n_local: %lu\n", n_local); - + chprintf(" Particles Zeldovich Pancake Initialized, n_local: %lu\n", n_local); } - -void Grid3D::Initialize_Uniform_Particles(){ - //Initialize positions assigning one particle at each cell in a uniform grid +void Grid3D::Initialize_Uniform_Particles() +{ + // Initialize positions assigning one particle at each cell in a uniform grid int i, j, k, id; Real x_pos, y_pos, z_pos; Real dVol, Mparticle; - dVol = H.dx * H.dy * H.dz; + dVol = H.dx * H.dy * H.dz; Mparticle = dVol; #ifdef SINGLE_PARTICLE_MASS @@ -717,31 +823,31 @@ void Grid3D::Initialize_Uniform_Particles(){ #endif part_int_t pID = 0; - for (k=H.n_ghost; k -#include -#include -#include -#include -#include "../global/global.h" -#include "../gravity/grav3D.h" + #include + #include + #include + #include -#ifdef PARTICLES_GPU -#define TPB_PARTICLES 1024 -// #define PRINT_GPU_MEMORY -#define PRINT_MAX_MEMORY_USAGE -#endif + #include + #include "../global/global.h" + #include "../gravity/grav3D.h" + #ifdef PARTICLES_GPU + #define TPB_PARTICLES 1024 + // #define PRINT_GPU_MEMORY + #define PRINT_MAX_MEMORY_USAGE + #endif /*! \class Part3D * \brief Class to create a set of particles in 3D space. */ -class Particles_3D +class Particles3D { - public: - + public: part_int_t n_local; part_int_t n_total; @@ -40,22 +39,21 @@ class Particles_3D Real particle_mass; - #ifdef COSMOLOGY + #ifdef COSMOLOGY Real current_z; Real current_a; - #endif - + #endif - #ifdef PARTICLES_CPU - #ifdef PARTICLE_IDS + #ifdef PARTICLES_CPU + #ifdef PARTICLE_IDS int_vector_t partIDs; - #endif - #ifndef SINGLE_PARTICLE_MASS + #endif + #ifndef SINGLE_PARTICLE_MASS real_vector_t mass; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE real_vector_t age; - #endif + #endif real_vector_t pos_x; real_vector_t pos_y; real_vector_t pos_z; @@ -65,13 +63,16 @@ class Particles_3D real_vector_t grav_x; real_vector_t grav_y; real_vector_t grav_z; - #endif //PARTICLES_CPU + #endif // PARTICLES_CPU - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU part_int_t particles_array_size; - #ifdef PARTICLE_IDS + #ifdef PARTICLE_IDS part_int_t *partIDs_dev; - #endif + #endif + #ifdef PARTICLE_AGE + Real *age_dev; + #endif Real *mass_dev; Real *pos_x_dev; Real *pos_y_dev; @@ -83,11 +84,9 @@ class Particles_3D Real *grav_y_dev; Real *grav_z_dev; + #endif // PARTICLES_GPU - #endif //PARTICLES_GPU - - - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA part_int_t n_transfer_x0; part_int_t n_transfer_x1; @@ -117,26 +116,21 @@ class Particles_3D part_int_t n_in_buffer_z0; part_int_t n_in_buffer_z1; - - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU int_vector_t out_indxs_vec_x0; int_vector_t out_indxs_vec_x1; int_vector_t out_indxs_vec_y0; int_vector_t out_indxs_vec_y1; int_vector_t out_indxs_vec_z0; int_vector_t out_indxs_vec_z1; - #endif //PARTICLES_CPU - + #endif // PARTICLES_CPU - #endif //MPI_CHOLLA + #endif // MPI_CHOLLA bool TRANSFER_DENSITY_BOUNDARIES; bool TRANSFER_PARTICLES_BOUNDARIES; - - struct Grid - { - + struct Grid { int nx_local, ny_local, nz_local; int nx_total, ny_total, nz_total; @@ -165,11 +159,10 @@ class Particles_3D Real *gravity_x; Real *gravity_y; Real *gravity_z; - #ifdef GRAVITY_GPU + #ifdef GRAVITY_GPU Real *density_dev; + #endif #endif - #endif - #ifdef PARTICLES_GPU Real *density_dev; @@ -180,7 +173,7 @@ class Particles_3D Real *dti_array_dev; Real *dti_array_host; - #ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA bool *transfer_particles_flags_d; int *transfer_particles_indices_d; int *replace_particles_indices_d; @@ -215,72 +208,104 @@ class Particles_3D Real *recv_buffer_z0_d; Real *recv_buffer_z1_d; - #endif // MPI_CHOLLA - - #endif //PARTICLES_GPU + #endif // MPI_CHOLLA + #endif // PARTICLES_GPU } G; - Particles_3D(void); + Particles3D(void); - void Initialize( struct parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, Real ydglobal, Real zdglobal ); + void Initialize(struct Parameters *P, Grav3D &Grav, Real xbound, Real ybound, Real zbound, Real xdglobal, + Real ydglobal, Real zdglobal); - void Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ); - void Free_GPU_Array_Real( Real *array ); - - #ifdef PARTICLES_GPU + void Allocate_Particles_Grid_Field_Real(Real **array_dev, int size); + void Free_GPU_Array_Real(Real *array); + + #ifdef PARTICLES_GPU - void Free_GPU_Array_int( int *array ); - void Free_GPU_Array_bool( bool *array ); + void Free_GPU_Array_int(int *array); + void Free_GPU_Array_bool(bool *array); + template + void Free_GPU_Array(T *array) + { + cudaFree(array); + } // TODO remove the Free_GPU_Array_ functions void Allocate_Memory_GPU(); - void Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ); - void Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ); - void Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ); - void Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size); - void Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size); - void Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size); + void Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size); + void Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size); + void Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size); + void Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, part_int_t size); + void Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, part_int_t size); + void Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size); void Free_Memory_GPU(); void Initialize_Grid_Values_GPU(); void Get_Density_CIC_GPU(); - void Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, Real *density_dev, Real *pos_x_dev, Real *pos_y_dev , Real *pos_z_dev, Real *mass_dev); + void Get_Density_CIC_GPU_function(part_int_t n_local, Real particle_mass, Real xMin, Real xMax, Real yMin, Real yMax, + Real zMin, Real zMax, Real dx, Real dy, Real dz, int nx_local, int ny_local, + int nz_local, int n_ghost_particles_grid, int n_cells, Real *density_h, + Real *density_dev, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, + Real *mass_dev); void Clear_Density_GPU(); - void Clear_Density_GPU_function( Real *density_dev, int n_cells); - void Copy_Potential_To_GPU( Real *potential_host, Real *potential_dev, int n_cells_potential ); - void Get_Gravity_Field_Particles_GPU( Real *potential_host ); - void Get_Gravity_Field_Particles_GPU_function( int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ); + void Clear_Density_GPU_function(Real *density_dev, int n_cells); + void Copy_Potential_To_GPU(Real *potential_host, Real *potential_dev, int n_cells_potential); + void Get_Gravity_Field_Particles_GPU(Real *potential_host); + void Get_Gravity_Field_Particles_GPU_function(int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, + int n_cells_potential, Real dx, Real dy, Real dz, Real *potential_host, + Real *potential_dev, Real *gravity_x_dev, Real *gravity_y_dev, + Real *gravity_z_dev); void Get_Gravity_CIC_GPU(); - void Get_Gravity_CIC_GPU_function( part_int_t n_local, int nx_local, int ny_local, int nz_local, int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev ); - Real Calc_Particles_dt_GPU_function( int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev ); - void Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ); - void Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ); - void Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ); - void Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ); - part_int_t Compute_Particles_GPU_Array_Size( part_int_t n ); - int Select_Particles_to_Transfer_GPU( int direction, int side ); - void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer, int buffer_length ); - void Replace_Tranfered_Particles_GPU( int n_transfer ); - void Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ); - void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ); - #ifdef PRINT_MAX_MEMORY_USAGE + void Get_Gravity_CIC_GPU_function(part_int_t n_local, int nx_local, int ny_local, int nz_local, + int n_ghost_particles_grid, Real xMin, Real xMax, Real yMin, Real yMax, Real zMin, + Real zMax, Real dx, Real dy, Real dz, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, + Real *gravity_x_dev, Real *gravity_y_dev, Real *gravity_z_dev); + Real Calc_Particles_dt_GPU_function(int ngrid, part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *dti_array_host, Real *dti_array_dev); + void Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); + void Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, + Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K); + void Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev); + void Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, + Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K); + part_int_t Compute_Particles_GPU_Array_Size(part_int_t n); + int Select_Particles_to_Transfer_GPU(int direction, int side); + void Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer, + int buffer_length); + void Replace_Tranfered_Particles_GPU(int n_transfer); + void Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv); + void Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d); + void Set_Particles_Open_Boundary_GPU(int dir, int side); + #ifdef PRINT_MAX_MEMORY_USAGE void Print_Max_Memory_Usage(); - #endif - - #endif //PARTICLES_GPU - + #endif + #endif // PARTICLES_GPU void Allocate_Memory(); void Initialize_Grid_Values(); - void Initialize_Sphere(struct parameters *P); + void Initialize_Sphere(struct Parameters *P); - void Initialize_Disk_Stellar_Clusters(struct parameters *P); + #if defined(PARTICLE_AGE) && !defined(SINGLE_PARTICLE_MASS) && defined(PARTICLE_IDS) + void Initialize_Disk_Stellar_Clusters(struct Parameters *P); + #endif - void Initialize_Zeldovich_Pancake( struct parameters *P ); + void Initialize_Zeldovich_Pancake(struct Parameters *P); - void Load_Particles_Data( struct parameters *P ); + void Load_Particles_Data(struct Parameters *P); void Free_Memory(); @@ -288,44 +313,44 @@ class Particles_3D void Clear_Density(); - void Get_Density_CIC_Serial( ); + void Get_Density_CIC_Serial(); - #ifdef HDF5 - void Load_Particles_Data_HDF5( hid_t file_id, int nfile, struct parameters *P ); - #endif + #ifdef HDF5 + void Load_Particles_Data_HDF5(hid_t file_id, int nfile, struct Parameters *P); + #endif - #ifdef PARALLEL_OMP - void Get_Density_CIC_OMP( ); - #endif + #ifdef PARALLEL_OMP + void Get_Density_CIC_OMP(); + #endif void Get_Density_CIC(); - #ifdef MPI_CHOLLA - void Clear_Particles_For_Transfer( void ); - void Select_Particles_to_Transfer_All( int *flags ); - void Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge, + #ifdef MPI_CHOLLA + void Clear_Particles_For_Transfer(void); + void Select_Particles_to_Transfer_All(int *flags); + void Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z); void Remove_Transfered_Particles(); - #ifdef PARTICLES_CPU - void Clear_Vectors_For_Transfers( void ); - void Add_Particle_To_Vectors( Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z, int *flags ); - void Select_Particles_to_Transfer_All_CPU( int *flags ); - void Load_Particles_to_Buffer_CPU( int direction, int side, Real *send_buffer, int buffer_length ); - void Unload_Particles_from_Buffer_CPU( int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, int buffer_length_z0, int buffer_length_z1, int *flags); - #endif//PARTICLES_CPU - - - #ifdef PARTICLES_GPU + #ifdef PARTICLES_CPU + void Clear_Vectors_For_Transfers(void); + void Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, + Real pVel_y, Real pVel_z, int *flags); + void Select_Particles_to_Transfer_All_CPU(int *flags); + void Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length); + void Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags); + #endif // PARTICLES_CPU + + #ifdef PARTICLES_GPU void Allocate_Memory_GPU_MPI(); void ReAllocate_Memory_GPU_MPI(); - void Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer, int buffer_length ); - #endif //PARTICLES_GPU - #endif - + void Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer, int buffer_length); + #endif // PARTICLES_GPU + #endif }; - -#endif //PARTICLES_H -#endif //PARTICLES + #endif // PARTICLES_H +#endif // PARTICLES diff --git a/src/particles/particles_3D_gpu.cu b/src/particles/particles_3D_gpu.cu index 6ce4bec0c..d72c9bc81 100644 --- a/src/particles/particles_3D_gpu.cu +++ b/src/particles/particles_3D_gpu.cu @@ -1,173 +1,199 @@ -#if defined(PARTICLES) +#if defined(PARTICLES) -#include -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../io/io.h" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../particles/particles_3D.h" + #include + #include + #include + #include + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" +void Particles3D::Free_GPU_Array_Real(Real *array) { cudaFree(array); } - - -void Particles_3D::Free_GPU_Array_Real( Real *array ){ cudaFree(array); } - - -void Particles_3D::Allocate_Particles_Grid_Field_Real( Real **array_dev, int size ){ +void Particles3D::Allocate_Particles_Grid_Field_Real(Real **array_dev, int size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); #endif - if ( global_free < size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(Real)/1000000 ); + if (global_free < size * sizeof(Real)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(Real) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); cudaDeviceSynchronize(); } + #ifdef PARTICLES_GPU + #ifdef PRINT_MAX_MEMORY_USAGE + #include "../mpi/mpi_routines.h" -#ifdef PARTICLES_GPU - -#ifdef PRINT_MAX_MEMORY_USAGE -#include "../mpi/mpi_routines.h" - -void Particles_3D::Print_Max_Memory_Usage(){ - +void Particles3D::Print_Max_Memory_Usage() +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); - + part_int_t n_local_max, n_total, mem_usage; Real fraction_max, global_free_min; - - n_local_max = (part_int_t) ReduceRealMax( (Real) n_local ); - n_total = ReducePartIntSum( n_local ); - fraction_max = (Real) n_local_max / (Real) n_total; - mem_usage = n_local_max * 9 * sizeof(Real); //Usage for pos, vel ans accel. - - global_free_min = ReduceRealMin( (Real) global_free ); - - chprintf( " Particles GPU Memory: N_local_max: %ld (%.1f %) mem_usage: %ld MB global_free_min: %.1f MB \n", n_local_max, fraction_max*100, mem_usage/1000000, global_free_min/1000000 ); - - -} -#endif + n_local_max = (part_int_t)ReduceRealMax((Real)n_local); + n_total = ReducePartIntSum(n_local); + fraction_max = (Real)n_local_max / (Real)n_total; + mem_usage = n_local_max * 9 * sizeof(Real); // Usage for pos, vel ans accel. + global_free_min = ReduceRealMin((Real)global_free); + chprintf( + " Particles GPU Memory: N_local_max: %ld (%.1f %) mem_usage: %ld MB " + " global_free_min: %.1f MB \n", + n_local_max, fraction_max * 100, mem_usage / 1000000, global_free_min / 1000000); +} -void Particles_3D::Free_GPU_Array_int( int *array ) { cudaFree(array); } -void Particles_3D::Free_GPU_Array_bool( bool *array ){ cudaFree(array); } + #endif +void Particles3D::Free_GPU_Array_int(int *array) { cudaFree(array); } +void Particles3D::Free_GPU_Array_bool(bool *array) { cudaFree(array); } -void __global__ Copy_Device_to_Device_Kernel( Real *src_array_dev, Real *dst_array_dev, part_int_t size ){ - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid < size ) dst_array_dev[tid] = src_array_dev[tid]; +template +void __global__ Copy_Device_to_Device_Kernel(T *src_array_dev, T *dst_array_dev, part_int_t size) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < size) { + dst_array_dev[tid] = src_array_dev[tid]; + } } -void Copy_Device_to_Device( Real *src_array_dev, Real *dst_array_dev, part_int_t size ){ - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; +template +void Copy_Device_to_Device(T *src_array_dev, T *dst_array_dev, part_int_t size) +{ + int ngrid = (size - 1) / TPB_PARTICLES + 1; dim3 dim1dGrid(ngrid, 1, 1); dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); - CudaCheckError(); - + hipLaunchKernelGGL(Copy_Device_to_Device_Kernel, dim1dGrid, dim1dBlock, 0, 0, src_array_dev, dst_array_dev, size); + GPU_Error_Check(); } - -void Particles_3D::Allocate_Particles_GPU_Array_Real( Real **array_dev, part_int_t size ){ +void Particles3D::Allocate_Particles_GPU_Array_Real(Real **array_dev, part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(Real)/1000000 ); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(Real)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(Real) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(Real))); cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_int( int **array_dev, part_int_t size ){ +void Particles3D::Allocate_Particles_GPU_Array_int(int **array_dev, part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(int) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(int)/1000000 ); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(int)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(int) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(int)) ); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(int))); cudaDeviceSynchronize(); } -void Particles_3D::Allocate_Particles_GPU_Array_bool( bool **array_dev, part_int_t size ){ +void Particles3D::Allocate_Particles_GPU_Array_Part_Int(part_int_t **array_dev, part_int_t size) +{ size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); - #ifdef PRINT_GPU_MEMORY - chprintf( "Allocating GPU Memory: %ld MB free \n", global_free/1000000); - #endif - if ( global_free < size*sizeof(bool) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %ld MB \n", global_free/1000000 ); - printf( " Requested Memory: %ld MB \n", size*sizeof(bool)/1000000 ); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(part_int_t)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(part_int_t) / 1000000); exit(-1); } - CudaSafeCall( cudaMalloc((void**)array_dev, size*sizeof(bool)) ); + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(part_int_t))); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Host_to_Device( Real *array_host, Real *array_dev, part_int_t size){ - CudaSafeCall( cudaMemcpy(array_dev, array_host, size*sizeof(Real), cudaMemcpyHostToDevice) ); +void Particles3D::Allocate_Particles_GPU_Array_bool(bool **array_dev, part_int_t size) +{ + size_t global_free, global_total; + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); + #ifdef PRINT_GPU_MEMORY + chprintf("Allocating GPU Memory: %ld MB free \n", global_free / 1000000); + #endif + if (global_free < size * sizeof(bool)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", size * sizeof(bool) / 1000000); + exit(-1); + } + GPU_Error_Check(cudaMalloc((void **)array_dev, size * sizeof(bool))); cudaDeviceSynchronize(); } -void Particles_3D::Copy_Particles_Array_Real_Device_to_Host( Real *array_dev, Real *array_host, part_int_t size){ - CudaSafeCall( cudaMemcpy(array_host, array_dev, size*sizeof(Real), cudaMemcpyDeviceToHost) ); +void Particles3D::Copy_Particles_Array_Real_Host_to_Device(Real *array_host, Real *array_dev, part_int_t size) +{ + GPU_Error_Check(cudaMemcpy(array_dev, array_host, size * sizeof(Real), cudaMemcpyHostToDevice)); cudaDeviceSynchronize(); } - - -__global__ void Set_Particles_Array_Real_Kernel( Real value, Real *array_dev, part_int_t size ){ - int tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid < size ) array_dev[tid] = value; +void Particles3D::Copy_Particles_Array_Real_Device_to_Host(Real *array_dev, Real *array_host, part_int_t size) +{ + GPU_Error_Check(cudaMemcpy(array_host, array_dev, size * sizeof(Real), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); } +void Particles3D::Copy_Particles_Array_Int_Host_to_Device(part_int_t *array_host, part_int_t *array_dev, + part_int_t size) +{ + GPU_Error_Check(cudaMemcpy(array_dev, array_host, size * sizeof(part_int_t), cudaMemcpyHostToDevice)); + cudaDeviceSynchronize(); +} +void Particles3D::Copy_Particles_Array_Int_Device_to_Host(part_int_t *array_dev, part_int_t *array_host, + part_int_t size) +{ + GPU_Error_Check(cudaMemcpy(array_host, array_dev, size * sizeof(part_int_t), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); +} -void Particles_3D::Set_Particles_Array_Real( Real value, Real *array_dev, part_int_t size){ +__global__ void Set_Particles_Array_Real_Kernel(Real value, Real *array_dev, part_int_t size) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < size) { + array_dev[tid] = value; + } +} +void Particles3D::Set_Particles_Array_Real(Real value, Real *array_dev, part_int_t size) +{ // set values for GPU kernels - int ngrid = (size + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (size - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0, value, array_dev, size); - CudaCheckError(); + hipLaunchKernelGGL(Set_Particles_Array_Real_Kernel, dim1dGrid, dim1dBlock, 0, 0, value, array_dev, size); + GPU_Error_Check(); } - - - - - - -#endif //PARTICLES_GPU -#endif//PARTICLES + #endif // PARTICLES_GPU +#endif // PARTICLES diff --git a/src/particles/particles_boundaries.cpp b/src/particles/particles_boundaries.cpp index b03b6038d..96e4f110e 100644 --- a/src/particles/particles_boundaries.cpp +++ b/src/particles/particles_boundaries.cpp @@ -1,26 +1,27 @@ #ifdef PARTICLES -#include -#include -#include -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "../particles/particles_3D.h" + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#ifdef PARTICLES_GPU -#include "../particles/particles_boundaries_gpu.h" -#include "../utils/gpu_arrays_functions.h" -#endif//PARTICLES_GPU -#endif//MPI_CHOLLA + #include + #include + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" -//Transfer the particles that moved outside the local domain -void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){ - + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #ifdef PARTICLES_GPU + #include "../utils/gpu_arrays_functions.h" + #include "particles_boundaries_gpu.h" + #endif // PARTICLES_GPU + #endif // MPI_CHOLLA - //Transfer Particles Boundaries +// Transfer the particles that moved outside the local domain +void Grid3D::Transfer_Particles_Boundaries(struct Parameters P) +{ + GPU_Error_Check(); + // Transfer Particles Boundaries Particles.TRANSFER_PARTICLES_BOUNDARIES = true; #ifdef CPU_TIME Timer.Part_Boundaries.Start(); @@ -30,572 +31,636 @@ void Grid3D::Transfer_Particles_Boundaries( struct parameters P ){ Timer.Part_Boundaries.End(); #endif Particles.TRANSFER_PARTICLES_BOUNDARIES = false; - + GPU_Error_Check(); } -#ifdef MPI_CHOLLA -//Remove the particles that were transferred outside the local domain -void Grid3D::Finish_Particles_Transfer( void ){ - - #ifdef PARTICLES_CPU + #ifdef MPI_CHOLLA +// Remove the particles that were transferred outside the local domain +void Grid3D::Finish_Particles_Transfer(void) +{ + #ifdef PARTICLES_CPU Particles.Remove_Transfered_Particles(); - #endif - + #endif } - -//Wait for the MPI request and unload the transferred particles +// Wait for the MPI request and unload the transferred particles void Grid3D::Wait_and_Unload_MPI_Comm_Particles_Buffers_BLOCK(int dir, int *flags) { - int iwait; - int index = 0; - int wait_max=0; + int index = 0; + int wait_max = 0; MPI_Status status; - - //find out how many recvs we need to wait for - if (dir==0) { - if(flags[0] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[1] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm + // find out how many recvs we need to wait for + if (dir == 0) { + if (flags[0] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[1] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } - if (dir==1) { - if(flags[2] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[3] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm + if (dir == 1) { + if (flags[2] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[3] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } - if (dir==2) { - if(flags[4] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm - if(flags[5] == 5) //there is communication on this face - wait_max++; //so we'll need to wait for its comm + if (dir == 2) { + if (flags[4] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } + if (flags[5] == 5) { // there is communication on this face + wait_max++; // so we'll need to wait for its comm + } } - //wait for any receives to complete - for(iwait=0;iwait Particles.G.recv_buffer_size_x0 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_x0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_x0) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_x0_particles, Particles.G.recv_buffer_size_x0, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_x0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_x0_particles , &buffer_length_particles_x0_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, &buffer_length_particles_x0_recv, buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_x0_particles , &buffer_length_particles_x0_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_x0_particles, &buffer_length_particles_x0_recv, buffer_length); #endif - // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " << Particles.n_recv_x0 << std::endl; - MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_x0 > 0 ) std::cout << " Recv X0: " << + // Particles.n_recv_x0 << std::endl; + MPI_Irecv(recv_buffer_x0_particles, buffer_length, MPI_CHREAL, source[0], 0, world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 1){ + if (index == 1) { buffer_length = Particles.n_recv_x1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_x1 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_x1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_x1) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_x1_particles, Particles.G.recv_buffer_size_x1, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_x1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_x1_particles , &buffer_length_particles_x1_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, &buffer_length_particles_x1_recv, buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_x1_particles , &buffer_length_particles_x1_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_x1_particles, &buffer_length_particles_x1_recv, buffer_length); #endif - // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout << " Recv X1: " << Particles.n_recv_x1 << " " << procID << " from " << source[1] << std::endl; - MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_x1 > 0 ) if ( Particles.n_recv_x1 > 0 ) std::cout + // << " Recv X1: " << Particles.n_recv_x1 << " " << procID << " from " + // << source[1] << std::endl; + MPI_Irecv(recv_buffer_x1_particles, buffer_length, MPI_CHREAL, source[1], 1, world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 2){ + if (index == 2) { buffer_length = Particles.n_recv_y0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_y0 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_y0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_y0) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_y0_particles, Particles.G.recv_buffer_size_y0, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_y0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_y0_particles , &buffer_length_particles_y0_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, &buffer_length_particles_y0_recv, buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_y0_particles , &buffer_length_particles_y0_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_y0_particles, &buffer_length_particles_y0_recv, buffer_length); #endif - // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " << Particles.n_recv_y0 << std::endl; - MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_y0 > 0 ) std::cout << " Recv Y0: " << + // Particles.n_recv_y0 << std::endl; + MPI_Irecv(recv_buffer_y0_particles, buffer_length, MPI_CHREAL, source[2], 2, world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 3){ + if (index == 3) { buffer_length = Particles.n_recv_y1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_y1 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_y1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_y1) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_y1_particles, Particles.G.recv_buffer_size_y1, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_y1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_y1_particles , &buffer_length_particles_y1_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, &buffer_length_particles_y1_recv, buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_y1_particles , &buffer_length_particles_y1_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_y1_particles, &buffer_length_particles_y1_recv, buffer_length); #endif - // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " << Particles.n_recv_y1 << std::endl; - MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_y1 > 0 ) std::cout << " Recv Y1: " << + // Particles.n_recv_y1 << std::endl; + MPI_Irecv(recv_buffer_y1_particles, buffer_length, MPI_CHREAL, source[3], 3, world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 4){ + if (index == 4) { buffer_length = Particles.n_recv_z0 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_z0 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_z0 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_z0) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_z0_particles, Particles.G.recv_buffer_size_z0, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_z0 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_z0_particles , &buffer_length_particles_z0_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, &buffer_length_particles_z0_recv, buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_z0_particles , &buffer_length_particles_z0_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_z0_particles, &buffer_length_particles_z0_recv, buffer_length); #endif - // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " << Particles.n_recv_z0 << std::endl; - MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_z0 > 0 ) std::cout << " Recv Z0: " << + // Particles.n_recv_z0 << std::endl; + MPI_Irecv(recv_buffer_z0_particles, buffer_length, MPI_CHREAL, source[4], 4, world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } - if ( index == 5){ + if (index == 5) { buffer_length = Particles.n_recv_z1 * N_DATA_PER_PARTICLE_TRANSFER; #ifdef PARTICLES_GPU - #ifdef MPI_GPU - if ( buffer_length > Particles.G.recv_buffer_size_z1 ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, Particles.G.gpu_allocation_factor*buffer_length, true ); - Particles.G.recv_buffer_size_z1 = (part_int_t) Particles.G.gpu_allocation_factor*buffer_length; + #ifdef MPI_GPU + if (buffer_length > Particles.G.recv_buffer_size_z1) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&recv_buffer_z1_particles, Particles.G.recv_buffer_size_z1, + Particles.G.gpu_allocation_factor * buffer_length, true); + Particles.G.recv_buffer_size_z1 = (part_int_t)Particles.G.gpu_allocation_factor * buffer_length; } - #else - Check_and_Grow_Particles_Buffer( &recv_buffer_z1_particles , &buffer_length_particles_z1_recv, buffer_length ); - #endif + #else + Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, &buffer_length_particles_z1_recv, buffer_length); + #endif #endif #ifdef PARTICLES_CPU - Check_and_Grow_Particles_Buffer( &recv_buffer_z1_particles , &buffer_length_particles_z1_recv, buffer_length ); + Check_and_Grow_Particles_Buffer(&recv_buffer_z1_particles, &buffer_length_particles_z1_recv, buffer_length); #endif - // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " << Particles.n_recv_z1 << std::endl; - MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, world, &recv_request_particles_transfer[*ireq_particles_transfer]); + // if ( Particles.n_recv_z1 >0 ) std::cout << " Recv Z1: " << + // Particles.n_recv_z1 << std::endl; + MPI_Irecv(recv_buffer_z1_particles, buffer_length, MPI_CHREAL, source[5], 5, world, + &recv_request_particles_transfer[*ireq_particles_transfer]); } *ireq_particles_transfer += 1; } - -//Make Send and Receive request for the number of particles that will be transferred, and then load and send the transfer particles -void Grid3D::Load_and_Send_Particles_X0( int ireq_n_particles, int ireq_particles_transfer ){ +// Make Send and Receive request for the number of particles that will be +// transferred, and then load and send the transfer particles +void Grid3D::Load_and_Send_Particles_X0(int ireq_n_particles, int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_x0_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_x0_particles = d_send_buffer_x0_particles; - Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send); + #endif // PARTICLES_GPU MPI_Irecv(&Particles.n_recv_x0, 1, MPI_PART_INT, source[0], 0, world, &recv_request_n_particles[ireq_n_particles]); - MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_x0, 1, MPI_PART_INT, dest[0], 1, world, &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); - // if ( Particles.n_send_x0 > 0 ) if ( Particles.n_send_x0 > 0 ) std::cout << " Sent X0: " << Particles.n_send_x0 << " " << procID << " to " << dest[0] << std::endl; + // if ( Particles.n_send_x0 > 0 ) if ( Particles.n_send_x0 > 0 ) std::cout + // << " Sent X0: " << Particles.n_send_x0 << " " << procID << " to " << + // dest[0] << std::endl; buffer_length = Particles.n_send_x0 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_x0_particles = h_send_buffer_x0_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_x0_particles , &buffer_length_particles_x0_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send ); - #endif //PARTICLES_CPU + Check_and_Grow_Particles_Buffer(&send_buffer_x0_particles, &buffer_length_particles_x0_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(0, 0, send_buffer_x0_particles, buffer_length_particles_x0_send); + #endif // PARTICLES_CPU - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + cudaMemcpy(h_send_buffer_x0_particles, d_send_buffer_x0_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_x0_particles = h_send_buffer_x0_particles; - #endif + #endif - MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_x0_particles, buffer_length, MPI_CHREAL, dest[0], 1, world, + &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_X1( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_X1(int ireq_n_particles, int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_x1_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_x1_particles = d_send_buffer_x1_particles; - Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send); + #endif // PARTICLES_GPU MPI_Irecv(&Particles.n_recv_x1, 1, MPI_PART_INT, source[1], 1, world, &recv_request_n_particles[ireq_n_particles]); - MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, &send_request_n_particles[1]); - MPI_Request_free(send_request_n_particles+1); - // if ( Particles.n_send_x1 > 0 ) std::cout << " Sent X1: " << Particles.n_send_x1 << std::endl; + MPI_Isend(&Particles.n_send_x1, 1, MPI_PART_INT, dest[1], 0, world, &send_request_n_particles[1]); + MPI_Request_free(send_request_n_particles + 1); + // if ( Particles.n_send_x1 > 0 ) std::cout << " Sent X1: " << + // Particles.n_send_x1 << std::endl; buffer_length = Particles.n_send_x1 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_x1_particles = h_send_buffer_x1_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_x1_particles , &buffer_length_particles_x1_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send ); - #endif //PARTICLES_CPU + Check_and_Grow_Particles_Buffer(&send_buffer_x1_particles, &buffer_length_particles_x1_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(0, 1, send_buffer_x1_particles, buffer_length_particles_x1_send); + #endif // PARTICLES_CPU - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + cudaMemcpy(h_send_buffer_x1_particles, d_send_buffer_x1_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_x1_particles = h_send_buffer_x1_particles; - #endif + #endif - MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, world, &send_request_particles_transfer[ireq_particles_transfer]);\ - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_x1_particles, buffer_length, MPI_CHREAL, dest[1], 0, world, + &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Y0( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Y0(int ireq_n_particles, int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_y0_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_y0_particles = d_send_buffer_y0_particles; - Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send); + #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_y0, 1, MPI_PART_INT, dest[2], 3, world, &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); MPI_Irecv(&Particles.n_recv_y0, 1, MPI_PART_INT, source[2], 2, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_y0 > 0 ) std::cout << " Sent Y0: " << Particles.n_send_y0 << std::endl; + // if ( Particles.n_send_y0 > 0 ) std::cout << " Sent Y0: " << + // Particles.n_send_y0 << std::endl; buffer_length = Particles.n_send_y0 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_y0_particles = h_send_buffer_y0_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_y0_particles , &buffer_length_particles_y0_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send ); - #endif //PARTICLES_CPU + Check_and_Grow_Particles_Buffer(&send_buffer_y0_particles, &buffer_length_particles_y0_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(1, 0, send_buffer_y0_particles, buffer_length_particles_y0_send); + #endif // PARTICLES_CPU - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + cudaMemcpy(h_send_buffer_y0_particles, d_send_buffer_y0_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_y0_particles = h_send_buffer_y0_particles; - #endif + #endif - MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_y0_particles, buffer_length, MPI_CHREAL, dest[2], 3, world, + &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Y1( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Y1(int ireq_n_particles, int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_y1_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_y1_particles = d_send_buffer_y1_particles; - Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send); + #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, &send_request_n_particles[1]); - MPI_Request_free(send_request_n_particles+1); + MPI_Isend(&Particles.n_send_y1, 1, MPI_PART_INT, dest[3], 2, world, &send_request_n_particles[1]); + MPI_Request_free(send_request_n_particles + 1); MPI_Irecv(&Particles.n_recv_y1, 1, MPI_PART_INT, source[3], 3, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_y1 > 0 ) std::cout << " Sent Y1: " << Particles.n_send_y1 << std::endl; + // if ( Particles.n_send_y1 > 0 ) std::cout << " Sent Y1: " << + // Particles.n_send_y1 << std::endl; buffer_length = Particles.n_send_y1 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_y1_particles = h_send_buffer_y1_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_y1_particles , &buffer_length_particles_y1_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send ); - #endif //PARTICLES_CPU + Check_and_Grow_Particles_Buffer(&send_buffer_y1_particles, &buffer_length_particles_y1_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(1, 1, send_buffer_y1_particles, buffer_length_particles_y1_send); + #endif // PARTICLES_CPU - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + cudaMemcpy(h_send_buffer_y1_particles, d_send_buffer_y1_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_y1_particles = h_send_buffer_y1_particles; - #endif + #endif - MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_y1_particles, buffer_length, MPI_CHREAL, dest[3], 2, world, + &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Z0( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Z0(int ireq_n_particles, int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_z0_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_z0_particles = d_send_buffer_z0_particles; - Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send); + #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, &send_request_n_particles[0]); + MPI_Isend(&Particles.n_send_z0, 1, MPI_PART_INT, dest[4], 5, world, &send_request_n_particles[0]); MPI_Request_free(send_request_n_particles); MPI_Irecv(&Particles.n_recv_z0, 1, MPI_PART_INT, source[4], 4, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_z0 > 0 ) std::cout << " Sent Z0: " << Particles.n_send_z0 << std::endl; + // if ( Particles.n_send_z0 > 0 ) std::cout << " Sent Z0: " << + // Particles.n_send_z0 << std::endl; buffer_length = Particles.n_send_z0 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_z0_particles = h_send_buffer_z0_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_z0_particles , &buffer_length_particles_z0_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send ); - #endif //PARTICLES_CPU + Check_and_Grow_Particles_Buffer(&send_buffer_z0_particles, &buffer_length_particles_z0_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(2, 0, send_buffer_z0_particles, buffer_length_particles_z0_send); + #endif // PARTICLES_CPU - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + cudaMemcpy(h_send_buffer_z0_particles, d_send_buffer_z0_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_z0_particles = h_send_buffer_z0_particles; - #endif + #endif - MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_z0_particles, buffer_length, MPI_CHREAL, dest[4], 5, world, + &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -void Grid3D::Load_and_Send_Particles_Z1( int ireq_n_particles, int ireq_particles_transfer ){ +void Grid3D::Load_and_Send_Particles_Z1(int ireq_n_particles, int ireq_particles_transfer) +{ int buffer_length; Real *send_buffer_z1_particles; - #ifdef PARTICLES_GPU + #ifdef PARTICLES_GPU send_buffer_z1_particles = d_send_buffer_z1_particles; - Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send ); - #endif //PARTICLES_GPU + Particles.Load_Particles_to_Buffer_GPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send); + #endif // PARTICLES_GPU - MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, &send_request_n_particles[1]); - MPI_Request_free(send_request_n_particles+1); + MPI_Isend(&Particles.n_send_z1, 1, MPI_PART_INT, dest[5], 4, world, &send_request_n_particles[1]); + MPI_Request_free(send_request_n_particles + 1); MPI_Irecv(&Particles.n_recv_z1, 1, MPI_PART_INT, source[5], 5, world, &recv_request_n_particles[ireq_n_particles]); - // if ( Particles.n_send_z1 > 0 ) std::cout << " Sent Z1: " << Particles.n_send_z1 << std::endl; + // if ( Particles.n_send_z1 > 0 ) std::cout << " Sent Z1: " << + // Particles.n_send_z1 << std::endl; buffer_length = Particles.n_send_z1 * N_DATA_PER_PARTICLE_TRANSFER; - #ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU send_buffer_z1_particles = h_send_buffer_z1_particles; - Check_and_Grow_Particles_Buffer( &send_buffer_z1_particles , &buffer_length_particles_z1_send, buffer_length ); - Particles.Load_Particles_to_Buffer_CPU( 2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send ); - #endif //PARTICLES_CPU + Check_and_Grow_Particles_Buffer(&send_buffer_z1_particles, &buffer_length_particles_z1_send, buffer_length); + Particles.Load_Particles_to_Buffer_CPU(2, 1, send_buffer_z1_particles, buffer_length_particles_z1_send); + #endif // PARTICLES_CPU - #if defined(PARTICLES_GPU) && !defined(MPI_GPU) - cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles, - buffer_length*sizeof(Real), cudaMemcpyDeviceToHost); + #if defined(PARTICLES_GPU) && !defined(MPI_GPU) + cudaMemcpy(h_send_buffer_z1_particles, d_send_buffer_z1_particles, buffer_length * sizeof(Real), + cudaMemcpyDeviceToHost); send_buffer_z1_particles = h_send_buffer_z1_particles; - #endif + #endif - MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, world, &send_request_particles_transfer[ireq_particles_transfer]); - MPI_Request_free(send_request_particles_transfer+ireq_particles_transfer); + MPI_Isend(send_buffer_z1_particles, buffer_length, MPI_CHREAL, dest[5], 4, world, + &send_request_particles_transfer[ireq_particles_transfer]); + MPI_Request_free(send_request_particles_transfer + ireq_particles_transfer); } -//Unload the Transferred particles from the MPI_buffer, after buffer was received -void Grid3D::Unload_Particles_from_Buffer_X0( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles, - buffer_length_particles_x0_recv*sizeof(Real), +// Unload the Transferred particles from the MPI_buffer, after buffer was +// received +void Grid3D::Unload_Particles_from_Buffer_X0(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 0, 0, h_recv_buffer_x0_particles, Particles.n_recv_x0, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU + cudaMemcpy(d_recv_buffer_x0_particles, h_recv_buffer_x0_particles, buffer_length_particles_x0_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 0, 0, d_recv_buffer_x0_particles, Particles.n_recv_x0 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(0, 0, d_recv_buffer_x0_particles, Particles.n_recv_x0); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_X1( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send, buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles, - buffer_length_particles_x1_recv*sizeof(Real), +void Grid3D::Unload_Particles_from_Buffer_X1(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 0, 1, h_recv_buffer_x1_particles, Particles.n_recv_x1, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU + cudaMemcpy(d_recv_buffer_x1_particles, h_recv_buffer_x1_particles, buffer_length_particles_x1_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 0, 1, d_recv_buffer_x1_particles, Particles.n_recv_x1 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(0, 1, d_recv_buffer_x1_particles, Particles.n_recv_x1); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Y0( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles, - buffer_length_particles_y0_recv*sizeof(Real), +void Grid3D::Unload_Particles_from_Buffer_Y0(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 1, 0, h_recv_buffer_y0_particles, Particles.n_recv_y0, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU + cudaMemcpy(d_recv_buffer_y0_particles, h_recv_buffer_y0_particles, buffer_length_particles_y0_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 1, 0, d_recv_buffer_y0_particles, Particles.n_recv_y0 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(1, 0, d_recv_buffer_y0_particles, Particles.n_recv_y0); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Y1( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles, - buffer_length_particles_y1_recv*sizeof(Real), +void Grid3D::Unload_Particles_from_Buffer_Y1(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 1, 1, h_recv_buffer_y1_particles, Particles.n_recv_y1, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU + cudaMemcpy(d_recv_buffer_y1_particles, h_recv_buffer_y1_particles, buffer_length_particles_y1_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 1, 1, d_recv_buffer_y1_particles, Particles.n_recv_y1 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(1, 1, d_recv_buffer_y1_particles, Particles.n_recv_y1); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Z0( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles, - buffer_length_particles_z0_recv*sizeof(Real), +void Grid3D::Unload_Particles_from_Buffer_Z0(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 2, 0, h_recv_buffer_z0_particles, Particles.n_recv_z0, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU + cudaMemcpy(d_recv_buffer_z0_particles, h_recv_buffer_z0_particles, buffer_length_particles_z0_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 2, 0, d_recv_buffer_z0_particles, Particles.n_recv_z0 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(2, 0, d_recv_buffer_z0_particles, Particles.n_recv_z0); + #endif // PARTICLES_GPU } -void Grid3D::Unload_Particles_from_Buffer_Z1( int *flags ){ - #ifdef PARTICLES_CPU - Particles.Unload_Particles_from_Buffer_CPU( 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, - h_send_buffer_y0_particles, h_send_buffer_y1_particles, h_send_buffer_z0_particles, - h_send_buffer_z1_particles, buffer_length_particles_y0_send , buffer_length_particles_y1_send, - buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); - #endif//PARTICLES_CPU - #ifdef PARTICLES_GPU - #ifndef MPI_GPU - cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles, - buffer_length_particles_z1_recv*sizeof(Real), +void Grid3D::Unload_Particles_from_Buffer_Z1(int *flags) +{ + #ifdef PARTICLES_CPU + Particles.Unload_Particles_from_Buffer_CPU( + 2, 1, h_recv_buffer_z1_particles, Particles.n_recv_z1, h_send_buffer_y0_particles, h_send_buffer_y1_particles, + h_send_buffer_z0_particles, h_send_buffer_z1_particles, buffer_length_particles_y0_send, + buffer_length_particles_y1_send, buffer_length_particles_z0_send, buffer_length_particles_z1_send, flags); + #endif // PARTICLES_CPU + #ifdef PARTICLES_GPU + #ifndef MPI_GPU + cudaMemcpy(d_recv_buffer_z1_particles, h_recv_buffer_z1_particles, buffer_length_particles_z1_recv * sizeof(Real), cudaMemcpyHostToDevice); - #endif - Particles.Unload_Particles_from_Buffer_GPU( 2, 1, d_recv_buffer_z1_particles, Particles.n_recv_z1 ); - #endif//PARTICLES_GPU + #endif + Particles.Unload_Particles_from_Buffer_GPU(2, 1, d_recv_buffer_z1_particles, Particles.n_recv_z1); + #endif // PARTICLES_GPU } - -//Find the particles that moved outside the local domain in order to transfer them. -void Particles_3D::Select_Particles_to_Transfer_All( int *flags ){ - - #ifdef PARTICLES_CPU - Select_Particles_to_Transfer_All_CPU( flags ); - #endif//PARTICLES_CPU +// Find the particles that moved outside the local domain in order to transfer +// them. +void Particles3D::Select_Particles_to_Transfer_All(int *flags) +{ + #ifdef PARTICLES_CPU + Select_Particles_to_Transfer_All_CPU(flags); + #endif // PARTICLES_CPU // When using PARTICLES_GPU the particles that need to be Transferred // are selected on the Load_Buffer_GPU functions - } - -void Particles_3D::Clear_Particles_For_Transfer( void ){ - - //Set the number of transferred particles to 0. +void Particles3D::Clear_Particles_For_Transfer(void) +{ + // Set the number of transferred particles to 0. n_transfer_x0 = 0; n_transfer_x1 = 0; n_transfer_y0 = 0; @@ -603,7 +668,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_transfer_z0 = 0; n_transfer_z1 = 0; - //Set the number of send particles to 0. + // Set the number of send particles to 0. n_send_x0 = 0; n_send_x1 = 0; n_send_y0 = 0; @@ -611,7 +676,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_send_z0 = 0; n_send_z1 = 0; - //Set the number of received particles to 0. + // Set the number of received particles to 0. n_recv_x0 = 0; n_recv_x1 = 0; n_recv_y0 = 0; @@ -619,7 +684,7 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_recv_z0 = 0; n_recv_z1 = 0; - //Set the number of particles in transfer buffers to 0. + // Set the number of particles in transfer buffers to 0. n_in_buffer_x0 = 0; n_in_buffer_x1 = 0; n_in_buffer_y0 = 0; @@ -627,264 +692,355 @@ void Particles_3D::Clear_Particles_For_Transfer( void ){ n_in_buffer_z0 = 0; n_in_buffer_z1 = 0; - - #ifdef PARTICLES_CPU - //Clear the particles indices that were transferred during the previous timestep + #ifdef PARTICLES_CPU + // Clear the particles indices that were transferred during the previous + // timestep Clear_Vectors_For_Transfers(); - #endif //PARTICLES_CPU - + #endif // PARTICLES_CPU } -#ifdef PARTICLES_GPU - -int Particles_3D::Select_Particles_to_Transfer_GPU( int direction, int side ){ + #ifdef PARTICLES_GPU +int Particles3D::Select_Particles_to_Transfer_GPU(int direction, int side) +{ int n_transfer; Real *pos; Real domainMin, domainMax; - if ( direction == 0 ){ - pos = pos_x_dev; + if (direction == 0) { + pos = pos_x_dev; domainMax = G.xMax; domainMin = G.xMin; } - if ( direction == 1 ){ - pos = pos_y_dev; + if (direction == 1) { + pos = pos_y_dev; domainMax = G.yMax; domainMin = G.yMin; } - if ( direction == 2 ){ - pos = pos_z_dev; + if (direction == 2) { + pos = pos_z_dev; domainMax = G.zMax; domainMin = G.zMin; } - - //Set the number of particles that will be sent and load the particles data into the transfer buffers - n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, G.transfer_particles_prefix_sum_blocks_d ); - CHECK(cudaDeviceSynchronize()); + // chprintf("n_local=%d SELECT PARTICLES: %d dir, %d side. Max/Min %.4e/%.4e + // \n", n_local, direction, side, domainMax, domainMin); Set the number of + // particles that will be sent and load the particles data into the transfer + // buffers + n_transfer = Select_Particles_to_Transfer_GPU_function( + n_local, side, domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, G.transfer_particles_flags_d, + G.transfer_particles_indices_d, G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, + G.transfer_particles_prefix_sum_blocks_d); + GPU_Error_Check(cudaDeviceSynchronize()); return n_transfer; } -void Particles_3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, int buffer_length ){ - +void Particles3D::Copy_Transfer_Particles_to_Buffer_GPU(int n_transfer, int direction, int side, Real *send_buffer_h, + int buffer_length) +{ part_int_t *n_send; int *buffer_size; int n_fields_to_transfer; Real *pos, *send_buffer_d; Real domainMin, domainMax; int bt_pos_x, bt_pos_y, bt_pos_z, bt_non_pos; + int field_id = -1; - bt_pos_x = -1; - bt_pos_y = -1; - bt_pos_z = -1; + bt_pos_x = -1; + bt_pos_y = -1; + bt_pos_z = -1; bt_non_pos = -1; - if ( direction == 0 ){ - pos = pos_x_dev; + if (direction == 0) { + pos = pos_x_dev; domainMin = G.domainMin_x; domainMax = G.domainMax_x; - if ( side == 0 ){ - n_send = &n_send_x0; - buffer_size = &G.send_buffer_size_x0; + if (side == 0) { + n_send = &n_send_x0; + buffer_size = &G.send_buffer_size_x0; send_buffer_d = G.send_buffer_x0_d; - bt_pos_x = G.boundary_type_x0; + bt_pos_x = G.boundary_type_x0; } - if ( side == 1 ){ - n_send = &n_send_x1; - buffer_size = &G.send_buffer_size_x1; + if (side == 1) { + n_send = &n_send_x1; + buffer_size = &G.send_buffer_size_x1; send_buffer_d = G.send_buffer_x1_d; - bt_pos_x = G.boundary_type_x1; + bt_pos_x = G.boundary_type_x1; } } - if ( direction == 1 ){ - pos = pos_y_dev; + if (direction == 1) { + pos = pos_y_dev; domainMin = G.domainMin_y; domainMax = G.domainMax_y; - if ( side == 0 ){ - n_send = &n_send_y0; - buffer_size = &G.send_buffer_size_y0; + if (side == 0) { + n_send = &n_send_y0; + buffer_size = &G.send_buffer_size_y0; send_buffer_d = G.send_buffer_y0_d; - bt_pos_y = G.boundary_type_y0; + bt_pos_y = G.boundary_type_y0; } - if ( side == 1 ){ - n_send = &n_send_y1; - buffer_size = &G.send_buffer_size_y1; + if (side == 1) { + n_send = &n_send_y1; + buffer_size = &G.send_buffer_size_y1; send_buffer_d = G.send_buffer_y1_d; - bt_pos_y = G.boundary_type_y1; + bt_pos_y = G.boundary_type_y1; } } - if ( direction == 2 ){ - pos = pos_z_dev; + if (direction == 2) { + pos = pos_z_dev; domainMin = G.domainMin_z; domainMax = G.domainMax_z; - if ( side == 0 ){ - n_send = &n_send_z0; - buffer_size = &G.send_buffer_size_z0; + if (side == 0) { + n_send = &n_send_z0; + buffer_size = &G.send_buffer_size_z0; send_buffer_d = G.send_buffer_z0_d; - bt_pos_z = G.boundary_type_z0; + bt_pos_z = G.boundary_type_z0; } - if ( side == 1 ){ - n_send = &n_send_z1; - buffer_size = &G.send_buffer_size_z1; + if (side == 1) { + n_send = &n_send_z1; + buffer_size = &G.send_buffer_size_z1; send_buffer_d = G.send_buffer_z1_d; - bt_pos_z = G.boundary_type_z1; + bt_pos_z = G.boundary_type_z1; } } - - - // If the number of particles in the array exceeds the size of the array, extend the array - if ( (*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER > *buffer_size ){ - printf( "Extending Particles Transfer Buffer "); - Extend_GPU_Array_Real( &send_buffer_d, *buffer_size, G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER, true ); - *buffer_size = (part_int_t) G.gpu_allocation_factor*(*n_send + n_transfer)*N_DATA_PER_PARTICLE_TRANSFER; + // If the number of particles in the array exceeds the size of the array, + // extend the array + if ((*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER > *buffer_size) { + printf("Extending Particles Transfer Buffer "); + Extend_GPU_Array(&send_buffer_d, *buffer_size, + G.gpu_allocation_factor * (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER, true); + *buffer_size = (part_int_t)G.gpu_allocation_factor * (*n_send + n_transfer) * N_DATA_PER_PARTICLE_TRANSFER; } // Load the particles that will be transferred into the buffers n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Load_Particles_to_Transfer_GPU_function( n_transfer, 0, n_fields_to_transfer, pos_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_x ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 1, n_fields_to_transfer, pos_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_y ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 2, n_fields_to_transfer, pos_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_pos_z ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 3, n_fields_to_transfer, vel_x_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 4, n_fields_to_transfer, vel_y_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - Load_Particles_to_Transfer_GPU_function( n_transfer, 5, n_fields_to_transfer, vel_z_dev, G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, bt_non_pos ); - - CHECK(cudaDeviceSynchronize()); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_x_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_x); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_y_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_y); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, pos_z_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_pos_z); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_x_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_y_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, vel_z_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #ifndef SINGLE_PARTICLE_MASS + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, mass_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #endif + #ifdef PARTICLE_IDS + Load_Particles_to_Transfer_Int_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, partIDs_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #endif + #ifdef PARTICLE_AGE + Load_Particles_to_Transfer_GPU_function(n_transfer, ++field_id, n_fields_to_transfer, age_dev, + G.transfer_particles_indices_d, send_buffer_d, domainMin, domainMax, + bt_non_pos); + #endif + GPU_Error_Check(cudaDeviceSynchronize()); *n_send += n_transfer; // if ( *n_send > 0 ) printf( "###Transfered %ld particles\n", *n_send); - - } - -void Particles_3D::Replace_Tranfered_Particles_GPU( int n_transfer ){ - +void Particles3D::Replace_Tranfered_Particles_GPU(int n_transfer) +{ // Replace the particles that were transferred - Replace_Transfered_Particles_GPU_function( n_transfer, pos_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, pos_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, pos_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, vel_x_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, vel_y_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - Replace_Transfered_Particles_GPU_function( n_transfer, vel_z_dev, G.transfer_particles_indices_d, G.replace_particles_indices_d, false ); - - CHECK(cudaDeviceSynchronize()); + Replace_Transfered_Particles_GPU_function(n_transfer, pos_x_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, pos_y_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, pos_z_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, vel_x_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, vel_y_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + Replace_Transfered_Particles_GPU_function(n_transfer, vel_z_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #ifndef SINGLE_PARTICLE_MASS + Replace_Transfered_Particles_GPU_function(n_transfer, mass_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #endif + #ifdef PARTICLE_IDS + Replace_Transfered_Particles_Int_GPU_function(n_transfer, partIDs_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #endif + #ifdef PARTICLE_AGE + Replace_Transfered_Particles_GPU_function(n_transfer, age_dev, G.transfer_particles_indices_d, + G.replace_particles_indices_d, false); + #endif + + GPU_Error_Check(cudaDeviceSynchronize()); // Update the local number of particles n_local -= n_transfer; - } - -void Particles_3D::Load_Particles_to_Buffer_GPU( int direction, int side, Real *send_buffer_h, int buffer_length ){ - +void Particles3D::Load_Particles_to_Buffer_GPU(int direction, int side, Real *send_buffer_h, int buffer_length) +{ int n_transfer; + n_transfer = Select_Particles_to_Transfer_GPU(direction, side); - n_transfer = Select_Particles_to_Transfer_GPU( direction, side ); - - Copy_Transfer_Particles_to_Buffer_GPU( n_transfer, direction, side, send_buffer_h, buffer_length ); - - Replace_Tranfered_Particles_GPU( n_transfer ); + Copy_Transfer_Particles_to_Buffer_GPU(n_transfer, direction, side, send_buffer_h, buffer_length); + Replace_Tranfered_Particles_GPU(n_transfer); } +/** + * Open boundary conditions follows the same logic as + * Load_Particles_to_Buffer_GPU, except that the particles that are selected for + * transfer are not moved into any buffer (Copy_Transfer_Particles_to_Buffer_GPU + * step is skipped). Also the domainMix/domainMax are the global min/max + * values. + */ +void Particles3D::Set_Particles_Open_Boundary_GPU(int dir, int side) +{ + int n_transfer; + /*Real *pos; + Real domainMin, domainMax; -void Particles_3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d ){ + if ( dir == 0 ){ + domainMin = G.domainMin_x; + domainMax = G.domainMax_x; + } + if ( dir == 1 ){ + domainMin = G.domainMin_y; + domainMax = G.domainMax_y; + } + if ( dir == 2 ){ + domainMin = G.domainMin_z; + domainMax = G.domainMax_z; + }*/ + n_transfer = Select_Particles_to_Transfer_GPU(dir, side); + // n_transfer = Select_Particles_to_Transfer_GPU_function( n_local, side, + // domainMin, domainMax, pos, G.n_transfer_d, G.n_transfer_h, + // G.transfer_particles_flags_d, G.transfer_particles_indices_d, + // G.replace_particles_indices_d, G.transfer_particles_prefix_sum_d, + // G.transfer_particles_prefix_sum_blocks_d ); + // GPU_Error_Check(cudaDeviceSynchronize()); + // chprintf("OPEN condition: removing %d\n", n_transfer); + Replace_Tranfered_Particles_GPU(n_transfer); +} +void Particles3D::Copy_Transfer_Particles_from_Buffer_GPU(int n_recv, Real *recv_buffer_d) +{ int n_fields_to_transfer; part_int_t n_local_after = n_local + n_recv; - if ( n_local_after > particles_array_size ){ - printf(" Reallocating GPU particles arrays. N local particles: %ld \n", n_local_after ); + if (n_local_after > particles_array_size) { + printf(" Reallocating GPU particles arrays. N local particles: %ld \n", n_local_after); int new_size = G.gpu_allocation_factor * n_local_after; - Extend_GPU_Array_Real( &pos_x_dev, (int) particles_array_size, new_size, true ); - Extend_GPU_Array_Real( &pos_y_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &pos_z_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &vel_x_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &vel_y_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &vel_z_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &grav_x_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &grav_y_dev, (int) particles_array_size, new_size, false ); - Extend_GPU_Array_Real( &grav_z_dev, (int) particles_array_size, new_size, false ); - particles_array_size = (part_int_t) new_size; + Extend_GPU_Array(&pos_x_dev, (int)particles_array_size, new_size, true); + Extend_GPU_Array(&pos_y_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&pos_z_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&vel_x_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&vel_y_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&vel_z_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&grav_x_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&grav_y_dev, (int)particles_array_size, new_size, false); + Extend_GPU_Array(&grav_z_dev, (int)particles_array_size, new_size, false); + #ifndef SINGLE_PARTICLE_MASS + Extend_GPU_Array(&mass_dev, (int)particles_array_size, new_size, false); + #endif + #ifdef PARTICLE_IDS + Extend_GPU_Array(&partIDs_dev, (int)particles_array_size, new_size, false); + #endif + #ifdef PARTICLE_AGE + Extend_GPU_Array(&age_dev, (int)particles_array_size, new_size, false); + #endif + particles_array_size = (part_int_t)new_size; ReAllocate_Memory_GPU_MPI(); } // Unload the particles that were transferred from the buffers + int field_id = -1; n_fields_to_transfer = N_DATA_PER_PARTICLE_TRANSFER; - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 0, n_fields_to_transfer, pos_x_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 1, n_fields_to_transfer, pos_y_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 2, n_fields_to_transfer, pos_z_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 3, n_fields_to_transfer, vel_x_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 4, n_fields_to_transfer, vel_y_dev, recv_buffer_d ); - Unload_Particles_to_Transfer_GPU_function( n_local, n_recv, 5, n_fields_to_transfer, vel_z_dev, recv_buffer_d ); - // + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_x_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_y_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, pos_z_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_x_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_y_dev, + recv_buffer_d); + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, vel_z_dev, + recv_buffer_d); + #ifndef SINGLE_PARTICLE_MASS + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, mass_dev, recv_buffer_d); + #endif + #ifdef PARTICLE_IDS + Unload_Particles_Int_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, partIDs_dev, + recv_buffer_d); + #endif + #ifdef PARTICLE_AGE + Unload_Particles_to_Transfer_GPU_function(n_local, n_recv, ++field_id, n_fields_to_transfer, age_dev, recv_buffer_d); + #endif + n_local += n_recv; // if ( n_recv > 0 ) printf( "###Unloaded %d particles\n", n_recv ); - - } - - -void Particles_3D::Unload_Particles_from_Buffer_GPU( int direction, int side , Real *recv_buffer_h, int n_recv ){ - +void Particles3D::Unload_Particles_from_Buffer_GPU(int direction, int side, Real *recv_buffer_h, int n_recv) +{ int buffer_size; Real domainMin, domainMax; Real *recv_buffer_d; - if ( direction == 0 ){ + if (direction == 0) { domainMin = G.domainMin_x; domainMin = G.domainMax_x; - if ( side == 0 ){ + if (side == 0) { buffer_size = G.recv_buffer_size_x0; recv_buffer_d = G.recv_buffer_x0_d; } - if ( side == 1 ){ + if (side == 1) { buffer_size = G.recv_buffer_size_x1; recv_buffer_d = G.recv_buffer_x1_d; } } - if ( direction == 1 ){ + if (direction == 1) { domainMin = G.domainMin_y; domainMin = G.domainMax_y; - if ( side == 0 ){ + if (side == 0) { buffer_size = G.recv_buffer_size_y0; recv_buffer_d = G.recv_buffer_y0_d; } - if ( side == 1 ){ + if (side == 1) { buffer_size = G.recv_buffer_size_y1; recv_buffer_d = G.recv_buffer_y1_d; } } - if ( direction == 2 ){ + if (direction == 2) { domainMin = G.domainMin_z; domainMin = G.domainMax_z; - if ( side == 0 ){ + if (side == 0) { buffer_size = G.recv_buffer_size_z0; recv_buffer_d = G.recv_buffer_z0_d; } - if ( side == 1 ){ + if (side == 1) { buffer_size = G.recv_buffer_size_z1; recv_buffer_d = G.recv_buffer_z1_d; } } - CudaCheckError(); - - Copy_Transfer_Particles_from_Buffer_GPU( n_recv, recv_buffer_d ); + GPU_Error_Check(); + Copy_Transfer_Particles_from_Buffer_GPU(n_recv, recv_buffer_d); } + #endif // PARTICLES_GPU - - -#endif //PARTICLES_GPU - - - - - - -#endif //MPI_CHOLLA -#endif //PARTICLES + #endif // MPI_CHOLLA +#endif // PARTICLES diff --git a/src/particles/particles_boundaries_cpu.cpp b/src/particles/particles_boundaries_cpu.cpp index b90963b05..27470befe 100644 --- a/src/particles/particles_boundaries_cpu.cpp +++ b/src/particles/particles_boundaries_cpu.cpp @@ -1,67 +1,75 @@ #if defined(PARTICLES) && defined(PARTICLES_CPU) -#include -#include -#include -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "../particles/particles_3D.h" + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include + #include + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "particles_3D.h" -//Get and remove Real value at index on vector -Real Get_and_Remove_Real( part_int_t indx, real_vector_t &vec ){ + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif + +// Get and remove Real value at index on vector +Real Get_and_Remove_Real(part_int_t indx, real_vector_t &vec) +{ Real value = vec[indx]; - vec[indx] = vec.back(); //The item at the specified index is replaced by the last item in the vector - vec.pop_back(); //The last item in the vector is discarded + vec[indx] = vec.back(); // The item at the specified index is replaced by the + // last item in the vector + vec.pop_back(); // The last item in the vector is discarded return value; } -//Remove Real value at index on vector -void Remove_Real( part_int_t indx, real_vector_t &vec ){ - vec[indx] = vec.back(); //The item at the specified index is replaced by the last item in the vector - vec.pop_back(); //The last item in the vector is discarded +// Remove Real value at index on vector +void Remove_Real(part_int_t indx, real_vector_t &vec) +{ + vec[indx] = vec.back(); // The item at the specified index is replaced by the + // last item in the vector + vec.pop_back(); // The last item in the vector is discarded } -//Get and remove integer value at index on vector -Real Get_and_Remove_partID( part_int_t indx, int_vector_t &vec ){ - Real value = (Real) vec[indx]; - vec[indx] = vec.back(); +// Get and remove integer value at index on vector +Real Get_and_Remove_partID(part_int_t indx, int_vector_t &vec) +{ + Real value = (Real)vec[indx]; + vec[indx] = vec.back(); vec.pop_back(); return value; } -//Remove integer value at index on vector -void Remove_ID( part_int_t indx, int_vector_t &vec ){ +// Remove integer value at index on vector +void Remove_ID(part_int_t indx, int_vector_t &vec) +{ vec[indx] = vec.back(); vec.pop_back(); } -//Convert Real to Integer for transfering particles IDs on Real buffer arrays -part_int_t Real_to_part_int( Real inVal ){ - part_int_t outVal = (part_int_t) inVal; - if ( (inVal - outVal) > 0.1 ) outVal += 1; - if ( fabs(outVal - inVal) > 0.5 ) outVal -= 1; +// Convert Real to Integer for transfering particles IDs on Real buffer arrays +part_int_t Real_to_part_int(Real inVal) +{ + part_int_t outVal = (part_int_t)inVal; + if ((inVal - outVal) > 0.1) outVal += 1; + if (fabs(outVal - inVal) > 0.5) outVal -= 1; return outVal; } -//Set periodic boundaries for particles. Only when not using MPI -void Grid3D::Set_Particles_Boundary( int dir, int side ){ - +// Set periodic boundaries for particles. Only when not using MPI +void Grid3D::Set_Particles_Boundary(int dir, int side) +{ Real d_min, d_max, L; - if ( dir == 0 ){ + if (dir == 0) { d_min = Particles.G.xMin; d_max = Particles.G.xMax; } - if ( dir == 1 ){ + if (dir == 1) { d_min = Particles.G.yMin; d_max = Particles.G.yMax; } - if ( dir == 2 ){ + if (dir == 2) { d_min = Particles.G.zMin; d_max = Particles.G.zMax; } @@ -71,250 +79,252 @@ void Grid3D::Set_Particles_Boundary( int dir, int side ){ bool changed_pos; Real pos; #ifdef PARALLEL_OMP - #pragma omp parallel for private( pos, changed_pos) num_threads( N_OMP_THREADS ) + #pragma omp parallel for private(pos, changed_pos) num_threads(N_OMP_THREADS) #endif - for( int i=0; i= d_max ) pos -= L;//When the position is on the right of the domain boundary, substract the domain Length to the position + if (side == 1) { + if (pos >= d_max) + pos -= L; // When the position is on the right of the domain boundary, + // substract the domain Length to the position changed_pos = true; } - //If the position was changed write the new position to the vectors - if ( !changed_pos ) continue; - if ( dir == 0 ) Particles.pos_x[i] = pos; - if ( dir == 1 ) Particles.pos_y[i] = pos; - if ( dir == 2 ) Particles.pos_z[i] = pos; - + // If the position was changed write the new position to the vectors + if (!changed_pos) continue; + if (dir == 0) Particles.pos_x[i] = pos; + if (dir == 1) Particles.pos_y[i] = pos; + if (dir == 2) Particles.pos_z[i] = pos; } } +// Set open boundaries for particles when not using MPI +void Grid3D::Set_Particles_Open_Boundary_CPU(int dir, int side) +{ + Real d_min, d_max; -//Set open boundaries for particles when not using MPI -void Grid3D::Set_Particles_Open_Boundary( int dir, int side ){ - Real d_min, d_max, L; - - if ( dir == 0 ){ - d_min = Particles.G.xMin; - d_max = Particles.G.xMax; + if (dir == 0) { + d_min = Particles.G.domainMin_x; + d_max = Particles.G.domainMax_x; } - if ( dir == 1 ){ - d_min = Particles.G.yMin; - d_max = Particles.G.yMax; + if (dir == 1) { + d_min = Particles.G.domainMin_y; + d_max = Particles.G.domainMax_y; } - if ( dir == 2 ){ - d_min = Particles.G.zMin; - d_max = Particles.G.zMax; + if (dir == 2) { + d_min = Particles.G.domainMin_z; + d_max = Particles.G.domainMax_z; } - L = d_max - d_min; - Real pos; int_vector_t removed_indices; #ifdef PARALLEL_OMP - #pragma omp parallel for private(pos) num_threads( N_OMP_THREADS ) + #pragma omp parallel for private(pos) num_threads(N_OMP_THREADS) #endif - for( int i=0; i d_max)) removed_indices.push_back(i); + // If the position is out of the region, remove. + if ((side == 0 && pos < d_min) || (side == 1 && pos > d_max)) removed_indices.push_back(i); } std::sort(removed_indices.begin(), removed_indices.end()); part_int_t indx, pIndx; part_int_t n_delete = removed_indices.size(); - for ( indx=0; indx= G.xMax && flags[1]==5 ){ - out_indxs_vec_x1.push_back( pIndx ); + if (pos_x[pIndx] >= G.xMax && flags[1] == 5) { + out_indxs_vec_x1.push_back(pIndx); continue; } - if ( pos_y[pIndx] < G.yMin && flags[2]==5 ){ - out_indxs_vec_y0.push_back( pIndx ); + if (pos_y[pIndx] < G.yMin && flags[2] == 5) { + out_indxs_vec_y0.push_back(pIndx); continue; } - if ( pos_y[pIndx] >= G.yMax && flags[3]==5 ){ - out_indxs_vec_y1.push_back( pIndx ); + if (pos_y[pIndx] >= G.yMax && flags[3] == 5) { + out_indxs_vec_y1.push_back(pIndx); continue; } - if ( pos_z[pIndx] < G.zMin && flags[4]==5 ){ - out_indxs_vec_z0.push_back( pIndx ); + if (pos_z[pIndx] < G.zMin && flags[4] == 5) { + out_indxs_vec_z0.push_back(pIndx); continue; } - if ( pos_z[pIndx] >= G.zMax && flags[5]==5 ){ - out_indxs_vec_z1.push_back( pIndx ); + if (pos_z[pIndx] >= G.zMax && flags[5] == 5) { + out_indxs_vec_z1.push_back(pIndx); continue; } } - //Sort the transfer Indices (NOT NEEDED: All indices are sorted at the end of the transfer before removing transferred particles ) - // std::sort(out_indxs_vec_x0.begin(), out_indxs_vec_x0.end()); - // std::sort(out_indxs_vec_x1.begin(), out_indxs_vec_x1.end()); - // std::sort(out_indxs_vec_y0.begin(), out_indxs_vec_y0.end()); - // std::sort(out_indxs_vec_y1.begin(), out_indxs_vec_y1.end()); - // std::sort(out_indxs_vec_z0.begin(), out_indxs_vec_z0.end()); - // std::sort(out_indxs_vec_z1.begin(), out_indxs_vec_z1.end()); - - //Add the size of the out_vectors to the number of particles that will be send in each direction + // Sort the transfer Indices (NOT NEEDED: All indices are sorted at the end of + // the transfer before removing transferred particles ) + // std::sort(out_indxs_vec_x0.begin(), out_indxs_vec_x0.end()); + // std::sort(out_indxs_vec_x1.begin(), out_indxs_vec_x1.end()); + // std::sort(out_indxs_vec_y0.begin(), out_indxs_vec_y0.end()); + // std::sort(out_indxs_vec_y1.begin(), out_indxs_vec_y1.end()); + // std::sort(out_indxs_vec_z0.begin(), out_indxs_vec_z0.end()); + // std::sort(out_indxs_vec_z1.begin(), out_indxs_vec_z1.end()); + + // Add the size of the out_vectors to the number of particles that will be + // send in each direction n_send_x0 += out_indxs_vec_x0.size(); n_send_x1 += out_indxs_vec_x1.size(); n_send_y0 += out_indxs_vec_y0.size(); n_send_y1 += out_indxs_vec_y1.size(); n_send_z0 += out_indxs_vec_z0.size(); n_send_z1 += out_indxs_vec_z1.size(); - } - -//Load the particles that need to be transferred to the MPI buffer -void Particles_3D::Load_Particles_to_Buffer_CPU( int direction, int side, Real *send_buffer, int buffer_length ){ - +// Load the particles that need to be transferred to the MPI buffer +void Particles3D::Load_Particles_to_Buffer_CPU(int direction, int side, Real *send_buffer, int buffer_length) +{ part_int_t n_out; part_int_t n_send; int_vector_t *out_indxs_vec; part_int_t *n_in_buffer; - //Depending on the direction and side select the vector with the particle indices for the transfer - if ( direction == 0 ){ - if ( side == 0 ){ + // Depending on the direction and side select the vector with the particle + // indices for the transfer + if (direction == 0) { + if (side == 0) { out_indxs_vec = &out_indxs_vec_x0; - n_send = n_send_x0; - n_in_buffer = &n_in_buffer_x0; + n_send = n_send_x0; + n_in_buffer = &n_in_buffer_x0; } - if ( side == 1 ){ + if (side == 1) { out_indxs_vec = &out_indxs_vec_x1; - n_send = n_send_x1; - n_in_buffer = &n_in_buffer_x1; + n_send = n_send_x1; + n_in_buffer = &n_in_buffer_x1; } } - if ( direction == 1 ){ - if ( side == 0 ){ + if (direction == 1) { + if (side == 0) { out_indxs_vec = &out_indxs_vec_y0; - n_send = n_send_y0; - n_in_buffer = &n_in_buffer_y0; + n_send = n_send_y0; + n_in_buffer = &n_in_buffer_y0; } - if ( side == 1 ){ + if (side == 1) { out_indxs_vec = &out_indxs_vec_y1; - n_send = n_send_y1; - n_in_buffer = &n_in_buffer_y1; + n_send = n_send_y1; + n_in_buffer = &n_in_buffer_y1; } } - if ( direction == 2 ){ - if ( side == 0 ){ + if (direction == 2) { + if (side == 0) { out_indxs_vec = &out_indxs_vec_z0; - n_send = n_send_z0; - n_in_buffer = &n_in_buffer_z0; + n_send = n_send_z0; + n_in_buffer = &n_in_buffer_z0; } - if ( side == 1 ){ + if (side == 1) { out_indxs_vec = &out_indxs_vec_z1; - n_send = n_send_z1; - n_in_buffer = &n_in_buffer_z1; + n_send = n_send_z1; + n_in_buffer = &n_in_buffer_z1; } } part_int_t offset, offset_extra; - n_out = out_indxs_vec->size(); //Number of particles to be transferred - offset = *n_in_buffer*N_DATA_PER_PARTICLE_TRANSFER; //Offset in the array to take in to account the particles that already reside in the buffer array + n_out = out_indxs_vec->size(); // Number of particles to be transferred + offset = *n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER; // Offset in the array to take in to + // account the particles that already + // reside in the buffer array part_int_t indx, pIndx; - for ( indx=0; indx buffer_length ) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; + // Check that the offset doesn't exceed the buffer size + if (offset > buffer_length) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; } } - -//Add the data of a single particle to a transfer buffer -void Particles_3D::Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, Real pAge, - Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, Real pVel_z){ - +// Add the data of a single particle to a transfer buffer +void Particles3D::Add_Particle_To_Buffer(Real *buffer, part_int_t n_in_buffer, int buffer_length, Real pId, Real pMass, + Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, Real pVel_x, Real pVel_y, + Real pVel_z) +{ int offset, offset_extra; offset = n_in_buffer * N_DATA_PER_PARTICLE_TRANSFER; - if (offset > buffer_length ) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; + if (offset > buffer_length) std::cout << "ERROR: Buffer length exceeded on particles transfer" << std::endl; buffer[offset + 0] = pPos_x; buffer[offset + 1] = pPos_y; buffer[offset + 2] = pPos_z; @@ -323,78 +333,79 @@ void Particles_3D::Add_Particle_To_Buffer( Real *buffer, part_int_t n_in_buffer, buffer[offset + 5] = pVel_z; offset_extra = offset + 5; - #ifndef SINGLE_PARTICLE_MASS + #ifndef SINGLE_PARTICLE_MASS offset_extra += 1; - buffer[ offset_extra ] = pMass; - #endif - #ifdef PARTICLE_IDS + buffer[offset_extra] = pMass; + #endif + #ifdef PARTICLE_IDS offset_extra += 1; buffer[offset_extra] = pId; - #endif - #ifdef PARTICLE_AGE + #endif + #ifdef PARTICLE_AGE offset_extra += 1; buffer[offset_extra] = pAge; - #endif + #endif } - -//After a particle was transferred, add the transferred particle data to the vectors that contain the data of the local particles -void Particles_3D::Add_Particle_To_Vectors( Real pId, Real pMass, Real pAge, - Real pPos_x, Real pPos_y, Real pPos_z, - Real pVel_x, Real pVel_y, Real pVel_z, int *flags ){ - +// After a particle was transferred, add the transferred particle data to the +// vectors that contain the data of the local particles +void Particles3D::Add_Particle_To_Vectors(Real pId, Real pMass, Real pAge, Real pPos_x, Real pPos_y, Real pPos_z, + Real pVel_x, Real pVel_y, Real pVel_z, int *flags) +{ // Make sure that the particle position is inside the local domain bool in_local = true; - if ( pPos_x < G.xMin || pPos_x >= G.xMax ) in_local = false; - if ( ( pPos_y < G.yMin && flags[2]==5 ) || ( pPos_y >= G.yMax && flags[3]==5 ) ) in_local = false; - if ( ( pPos_z < G.zMin && flags[4]==5 ) || ( pPos_z >= G.zMax && flags[4]==5 ) ) in_local = false; - if ( ! in_local ) { + if (pPos_x < G.xMin || pPos_x >= G.xMax) in_local = false; + if ((pPos_y < G.yMin && flags[2] == 5) || (pPos_y >= G.yMax && flags[3] == 5)) in_local = false; + if ((pPos_z < G.zMin && flags[4] == 5) || (pPos_z >= G.zMax && flags[4] == 5)) in_local = false; + if (!in_local) { std::cout << " Adding particle out of local domain to vectors Error:" << std::endl; #ifdef PARTICLE_IDS std::cout << " Particle outside Local domain pID: " << pId << std::endl; #else std::cout << " Particle outside Local domain " << std::endl; #endif - std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; - std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; - std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; + std::cout << " Domain X: " << G.xMin << " " << G.xMax << std::endl; + std::cout << " Domain Y: " << G.yMin << " " << G.yMax << std::endl; + std::cout << " Domain Z: " << G.zMin << " " << G.zMax << std::endl; std::cout << " Particle X: " << pPos_x << std::endl; std::cout << " Particle Y: " << pPos_y << std::endl; std::cout << " Particle Z: " << pPos_z << std::endl; } - //TODO: is it good enough to log the error (but then go ahead and add it to the vector)? - - //Append the particle data to the local data vectors - pos_x.push_back( pPos_x ); - pos_y.push_back( pPos_y ); - pos_z.push_back( pPos_z ); - vel_x.push_back( pVel_x ); - vel_y.push_back( pVel_y ); - vel_z.push_back( pVel_z ); - #ifndef SINGLE_PARTICLE_MASS - mass.push_back( pMass ); - #endif - #ifdef PARTICLE_IDS - partIDs.push_back( Real_to_part_int(pId) ); - #endif - #ifdef PARTICLE_AGE + // TODO: is it good enough to log the error (but then go ahead and add it to + // the vector)? + + // Append the particle data to the local data vectors + pos_x.push_back(pPos_x); + pos_y.push_back(pPos_y); + pos_z.push_back(pPos_z); + vel_x.push_back(pVel_x); + vel_y.push_back(pVel_y); + vel_z.push_back(pVel_z); + #ifndef SINGLE_PARTICLE_MASS + mass.push_back(pMass); + #endif + #ifdef PARTICLE_IDS + partIDs.push_back(Real_to_part_int(pId)); + #endif + #ifdef PARTICLE_AGE age.push_back(pAge); - #endif + #endif grav_x.push_back(0); grav_y.push_back(0); grav_z.push_back(0); - //Add one to the local number of particles + // Add one to the local number of particles n_local += 1; } - - -//After the MPI transfer, unload the particles data from the buffers -void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Real *recv_buffer, part_int_t n_recv, - Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, int buffer_length_z0, int buffer_length_z1, int *flags){ - - //Loop over the data in the recv_buffer, get the data for each particle and append the particle data to the local vecors +// After the MPI transfer, unload the particles data from the buffers +void Particles3D::Unload_Particles_from_Buffer_CPU(int direction, int side, Real *recv_buffer, part_int_t n_recv, + Real *send_buffer_y0, Real *send_buffer_y1, Real *send_buffer_z0, + Real *send_buffer_z1, int buffer_length_y0, int buffer_length_y1, + int buffer_length_z0, int buffer_length_z1, int *flags) +{ + // Loop over the data in the recv_buffer, get the data for each particle and + // append the particle data to the local vecors int offset_buff, offset_extra; part_int_t pId; @@ -402,25 +413,25 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re offset_buff = 0; part_int_t indx; - for ( indx=0; indx= G.domainMax_x ) pPos_x -= ( G.domainMax_x - G.domainMin_x ); + // GLOBAL PERIODIC BOUNDARIES: for the X direction + if (pPos_x < G.domainMin_x) pPos_x += (G.domainMax_x - G.domainMin_x); + if (pPos_x >= G.domainMax_x) pPos_x -= (G.domainMax_x - G.domainMin_x); - //If the particle x_position is outside the local domain there was an error - if ( ( pPos_x < G.xMin ) || ( pPos_x >= G.xMax ) ){ - #ifdef PARTICLE_IDS + // If the particle x_position is outside the local domain there was an error + if ((pPos_x < G.xMin) || (pPos_x >= G.xMax)) { + #ifdef PARTICLE_IDS std::cout << "ERROR Particle Transfer out of X domain pID: " << pId << std::endl; - #else + #else std::cout << "ERROR Particle Transfer out of X domain" << std::endl; - #endif + #endif std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl; std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl; std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl; @@ -453,35 +464,39 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re continue; } - // If the y_position at the X_Tansfer (direction=0) is outside the local domain, then the particles is added to the buffer for the Y_Transfer - if (direction == 0 ){ - if ( pPos_y < G.yMin && flags[2]==5 ){ - Add_Particle_To_Buffer( send_buffer_y0, n_in_buffer_y0, buffer_length_y0, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + // If the y_position at the X_Tansfer (direction=0) is outside the local + // domain, then the particles is added to the buffer for the Y_Transfer + if (direction == 0) { + if (pPos_y < G.yMin && flags[2] == 5) { + Add_Particle_To_Buffer(send_buffer_y0, n_in_buffer_y0, buffer_length_y0, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_y0 += 1; n_in_buffer_y0 += 1; continue; } - if ( pPos_y >= G.yMax && flags[3]==5 ){ - Add_Particle_To_Buffer( send_buffer_y1, n_in_buffer_y1, buffer_length_y1, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + if (pPos_y >= G.yMax && flags[3] == 5) { + Add_Particle_To_Buffer(send_buffer_y1, n_in_buffer_y1, buffer_length_y1, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_y1 += 1; n_in_buffer_y1 += 1; continue; } } - //PERIODIC BOUNDARIES: for the Y direction - if ( direction == 1 ){ - if ( pPos_y < G.domainMin_y ) pPos_y += ( G.domainMax_y - G.domainMin_y ); - if ( pPos_y >= G.domainMax_y ) pPos_y -= ( G.domainMax_y - G.domainMin_y ); + // PERIODIC BOUNDARIES: for the Y direction + if (direction == 1) { + if (pPos_y < G.domainMin_y) pPos_y += (G.domainMax_y - G.domainMin_y); + if (pPos_y >= G.domainMax_y) pPos_y -= (G.domainMax_y - G.domainMin_y); } - //If the particle y_position is outside the local domain after the X-Transfer, there was an error - if ( (direction==1 || direction==2) && (( pPos_y < G.yMin ) || ( pPos_y >= G.yMax )) ){ - #ifdef PARTICLE_IDS + // If the particle y_position is outside the local domain after the + // X-Transfer, there was an error + if ((direction == 1 || direction == 2) && ((pPos_y < G.yMin) || (pPos_y >= G.yMax))) { + #ifdef PARTICLE_IDS std::cout << "ERROR Particle Transfer out of Y domain pID: " << pId << std::endl; - #else + #else std::cout << "ERROR Particle Transfer out of Y domain" << std::endl; - #endif + #endif std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl; std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl; std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl; @@ -491,35 +506,39 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re continue; } - // If the z_position at the X_Tansfer or Y_Transfer is outside the local domain, then the particles is added to the buffer for the Z_Transfer - if (direction !=2 ){ - if ( pPos_z < G.zMin && flags[4]==5 ){ - Add_Particle_To_Buffer( send_buffer_z0, n_in_buffer_z0, buffer_length_z0, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + // If the z_position at the X_Tansfer or Y_Transfer is outside the local + // domain, then the particles is added to the buffer for the Z_Transfer + if (direction != 2) { + if (pPos_z < G.zMin && flags[4] == 5) { + Add_Particle_To_Buffer(send_buffer_z0, n_in_buffer_z0, buffer_length_z0, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_z0 += 1; n_in_buffer_z0 += 1; continue; } - if ( pPos_z >= G.zMax && flags[5]==5 ){ - Add_Particle_To_Buffer( send_buffer_z1, n_in_buffer_z1, buffer_length_z1, pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z ); + if (pPos_z >= G.zMax && flags[5] == 5) { + Add_Particle_To_Buffer(send_buffer_z1, n_in_buffer_z1, buffer_length_z1, pId, pMass, pAge, pPos_x, pPos_y, + pPos_z, pVel_x, pVel_y, pVel_z); n_send_z1 += 1; n_in_buffer_z1 += 1; continue; } } - //GLOBAL PERIODIC BOUNDARIES: for the Z direction - if ( direction == 2 ){ - if ( pPos_z < G.domainMin_z ) pPos_z += ( G.domainMax_z - G.domainMin_z ); - if ( pPos_z >= G.domainMax_z ) pPos_z -= ( G.domainMax_z - G.domainMin_z ); + // GLOBAL PERIODIC BOUNDARIES: for the Z direction + if (direction == 2) { + if (pPos_z < G.domainMin_z) pPos_z += (G.domainMax_z - G.domainMin_z); + if (pPos_z >= G.domainMax_z) pPos_z -= (G.domainMax_z - G.domainMin_z); } - //If the particle z_position is outside the local domain after the X-Transfer and Y-Transfer, there was an error - if ( (direction==2) && (( pPos_z < G.zMin ) || ( pPos_z >= G.zMax )) ){ - #ifdef PARTICLE_IDS + // If the particle z_position is outside the local domain after the + // X-Transfer and Y-Transfer, there was an error + if ((direction == 2) && ((pPos_z < G.zMin) || (pPos_z >= G.zMax))) { + #ifdef PARTICLE_IDS std::cout << "ERROR Particle Transfer out of Z domain pID: " << pId << std::endl; - #else + #else std::cout << "ERROR Particle Transfer out of Z domain" << std::endl; - #endif + #endif std::cout << " posX: " << pPos_x << " velX: " << pVel_x << std::endl; std::cout << " posY: " << pPos_y << " velY: " << pVel_y << std::endl; std::cout << " posZ: " << pPos_z << " velZ: " << pVel_z << std::endl; @@ -529,16 +548,16 @@ void Particles_3D::Unload_Particles_from_Buffer_CPU( int direction, int side, Re continue; } - //If the particle doesn't have to be transferred to the y_direction or z_direction, then add the particle date to the local vectors - Add_Particle_To_Vectors( pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z, flags ); + // If the particle doesn't have to be transferred to the y_direction or + // z_direction, then add the particle date to the local vectors + Add_Particle_To_Vectors(pId, pMass, pAge, pPos_x, pPos_y, pPos_z, pVel_x, pVel_y, pVel_z, flags); } } - -//Remove the particles that were transferred outside the local domain -void Particles_3D::Remove_Transfered_Particles( void ){ - - //Get the number of particles to delete +// Remove the particles that were transferred outside the local domain +void Particles3D::Remove_Transfered_Particles(void) +{ + // Get the number of particles to delete part_int_t n_delete = 0; n_delete += out_indxs_vec_x0.size(); n_delete += out_indxs_vec_x1.size(); @@ -548,16 +567,18 @@ void Particles_3D::Remove_Transfered_Particles( void ){ n_delete += out_indxs_vec_z1.size(); // std::cout << "N to delete: " << n_delete << std::endl; - //Concatenate the indices of all the particles that moved into a new vector (delete_indxs_vec) + // Concatenate the indices of all the particles that moved into a new vector + // (delete_indxs_vec) int_vector_t delete_indxs_vec; - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_x0.begin(), out_indxs_vec_x0.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_x1.begin(), out_indxs_vec_x1.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_y0.begin(), out_indxs_vec_y0.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_y1.begin(), out_indxs_vec_y1.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_z0.begin(), out_indxs_vec_z0.end() ); - delete_indxs_vec.insert( delete_indxs_vec.end(), out_indxs_vec_z1.begin(), out_indxs_vec_z1.end() ); - - //Clear the vectors that stored the transferred indices for each direction. All these indices are now stored in delete_indxs_vec + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x0.begin(), out_indxs_vec_x0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_x1.begin(), out_indxs_vec_x1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y0.begin(), out_indxs_vec_y0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_y1.begin(), out_indxs_vec_y1.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z0.begin(), out_indxs_vec_z0.end()); + delete_indxs_vec.insert(delete_indxs_vec.end(), out_indxs_vec_z1.begin(), out_indxs_vec_z1.end()); + + // Clear the vectors that stored the transferred indices for each direction. + // All these indices are now stored in delete_indxs_vec out_indxs_vec_x0.clear(); out_indxs_vec_x1.clear(); out_indxs_vec_y0.clear(); @@ -565,63 +586,66 @@ void Particles_3D::Remove_Transfered_Particles( void ){ out_indxs_vec_z0.clear(); out_indxs_vec_z1.clear(); - //Sort the indices that need to be deleted so that the particles are deleted from right to left + // Sort the indices that need to be deleted so that the particles are deleted + // from right to left std::sort(delete_indxs_vec.begin(), delete_indxs_vec.end()); part_int_t indx, pIndx; - for ( indx=0; indx -#include -#include -#include -#include "../utils/gpu.hpp" -#include -#include "../io/io.h" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../particles/particles_3D.h" -#include "../grid/grid3D.h" -#include "../particles/particles_boundaries_gpu.h" - -#define SCAN_SHARED_SIZE 2*TPB_PARTICLES - - -__global__ void Set_Particles_Boundary_Kernel( int side, part_int_t n_local, Real *pos_dev, Real d_min, Real d_max, Real d_length ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; + #include + #include + #include + #include + + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" + #include "particles_boundaries_gpu.h" + + #define SCAN_SHARED_SIZE (2 * TPB_PARTICLES) + +__global__ void Set_Particles_Boundary_Kernel(int side, part_int_t n_local, Real *pos_dev, Real d_min, Real d_max, + Real d_length) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) { + return; + } Real pos; pos = pos_dev[tid]; - if ( side == 0 ){ - if ( pos < d_min ) pos += d_length; + if (side == 0) { + if (pos < d_min) { + pos += d_length; + } } - if ( side == 1 ){ - if ( pos >= d_max ) pos -= d_length; + if (side == 1) { + if (pos >= d_max) { + pos -= d_length; + } } pos_dev[tid] = pos; - } - -void Grid3D::Set_Particles_Boundary_GPU( int dir, int side ){ - +void Grid3D::Set_Particles_Boundary_GPU(int dir, int side) +{ Real d_min, d_max, L; Real *pos_dev; - if ( dir == 0 ){ - d_min = Particles.G.zMin; - d_max = Particles.G.zMax; + if (dir == 0) { + d_min = Particles.G.zMin; + d_max = Particles.G.zMax; pos_dev = Particles.pos_x_dev; } - if ( dir == 1 ){ - d_min = Particles.G.yMin; - d_max = Particles.G.yMax; + if (dir == 1) { + d_min = Particles.G.yMin; + d_max = Particles.G.yMax; pos_dev = Particles.pos_y_dev; } - if ( dir == 2 ){ - d_min = Particles.G.zMin; - d_max = Particles.G.zMax; + if (dir == 2) { + d_min = Particles.G.zMin; + d_max = Particles.G.zMax; pos_dev = Particles.pos_z_dev; } L = d_max - d_min; // set values for GPU kernels - int grid_size = (Particles.n_local - 1) / TPB_PARTICLES + 1; + int grid_size = (Particles.n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, side, Particles.n_local, pos_dev, d_min, d_max, L ); - CudaCheckError(); + hipLaunchKernelGGL(Set_Particles_Boundary_Kernel, dim1dGrid, dim1dBlock, 0, 0, side, Particles.n_local, pos_dev, + d_min, d_max, L); + GPU_Error_Check(); } - // #ifdef MPI_CHOLLA -__global__ void Get_Transfer_Flags_Kernel( part_int_t n_total, int side, Real d_min, Real d_max, Real *pos_d, bool *transfer_flags_d ){ - +__global__ void Get_Transfer_Flags_Kernel(part_int_t n_total, int side, Real d_min, Real d_max, Real *pos_d, + bool *transfer_flags_d) +{ int tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_total ) return; + if (tid >= n_total) { + return; + } - bool transfer = 0; + bool transfer = false; Real pos = pos_d[tid]; - // if ( tid < 1 ) printf( "%f\n", pos); - if ( side == 0 ){ - if ( pos < d_min ) transfer = 1; + if (side == 0 && pos < d_min) { + transfer = true; } - - if ( side == 1 ){ - if ( pos >= d_max ) transfer = 1; + if (side == 1 && pos >= d_max) { + transfer = true; } - // if ( transfer ) printf( "##Thread particles transfer\n"); transfer_flags_d[tid] = transfer; } - -__global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *prefix_sum_block_d ){ - +__global__ void Scan_Kernel(part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *prefix_sum_block_d) +{ __shared__ int data_sh[SCAN_SHARED_SIZE]; int tid_block, block_start; // tid = threadIdx.x + blockIdx.x * blockDim.x; tid_block = threadIdx.x; - block_start = 2*blockIdx.x*blockDim.x; + block_start = 2 * blockIdx.x * blockDim.x; - data_sh[2*tid_block] = block_start + 2*tid_block < n_total ? (int) transfer_flags_d[block_start + 2*tid_block] : 0; - data_sh[2*tid_block+1] = block_start + 2*tid_block+1 < n_total ? (int) transfer_flags_d[block_start + 2*tid_block+1] : 0; + data_sh[2 * tid_block] = + block_start + 2 * tid_block < n_total ? (int)transfer_flags_d[block_start + 2 * tid_block] : 0; + data_sh[2 * tid_block + 1] = + block_start + 2 * tid_block + 1 < n_total ? (int)transfer_flags_d[block_start + 2 * tid_block + 1] : 0; __syncthreads(); int offset = 1; - int n = blockDim.x*2; + int n = blockDim.x * 2; int ai, bi; int t; - for (int d = n/2; d>0; d/=2){ - + for (int d = n / 2; d > 0; d /= 2) { __syncthreads(); - if ( tid_block < d ){ - ai = offset*(2*tid_block+1)-1; - bi = offset*(2*tid_block+2)-1; + if (tid_block < d) { + ai = offset * (2 * tid_block + 1) - 1; + bi = offset * (2 * tid_block + 2) - 1; data_sh[bi] += data_sh[ai]; } offset *= 2; } // Clear the last element - if (tid_block == 0) data_sh[n - 1] = 0; + if (tid_block == 0) { + data_sh[n - 1] = 0; + } // Traverse down tree & build scan - for (int d = 1; d < n; d *= 2){ - + for (int d = 1; d < n; d *= 2) { __syncthreads(); - offset /=2; - if (tid_block < d){ - - ai = offset*(2*tid_block+1)-1; - bi = offset*(2*tid_block+2)-1; + offset /= 2; + if (tid_block < d) { + ai = offset * (2 * tid_block + 1) - 1; + bi = offset * (2 * tid_block + 2) - 1; - t = data_sh[ai]; + t = data_sh[ai]; data_sh[ai] = data_sh[bi]; data_sh[bi] += t; } @@ -148,154 +154,190 @@ __global__ void Scan_Kernel( part_int_t n_total, bool *transfer_flags_d, int *pr __syncthreads(); // Write results to device memory - if ( block_start + 2*tid_block < n_total ) prefix_sum_d[block_start + 2*tid_block] = data_sh[2*tid_block]; - if ( block_start + 2*tid_block+1 < n_total) prefix_sum_d[block_start + 2*tid_block+1] = data_sh[2*tid_block+1]; + if (block_start + 2 * tid_block < n_total) { + prefix_sum_d[block_start + 2 * tid_block] = data_sh[2 * tid_block]; + } + if (block_start + 2 * tid_block + 1 < n_total) { + prefix_sum_d[block_start + 2 * tid_block + 1] = data_sh[2 * tid_block + 1]; + } // Write the block sum - int last_flag_block = (int) transfer_flags_d[block_start + 2*(blockDim.x-1)+1]; - if (tid_block == 0) prefix_sum_block_d[blockIdx.x] = data_sh[2*(blockDim.x-1)+1] + last_flag_block; + int last_flag_block = (int)transfer_flags_d[block_start + 2 * (blockDim.x - 1) + 1]; + if (tid_block == 0) { + prefix_sum_block_d[blockIdx.x] = data_sh[2 * (blockDim.x - 1) + 1] + last_flag_block; + } } - -__global__ void Prefix_Sum_Blocks_Kernel( int n_partial, int *prefix_sum_block_d ){ - - int tid_block, val, start_index, n_threads; +__global__ void Prefix_Sum_Blocks_Kernel(int n_partial, int *prefix_sum_block_d) +{ + int tid_block, val, start_index, n_threads; tid_block = threadIdx.x; n_threads = blockDim.x; __shared__ int data_sh[TPB_PARTICLES]; - - int sum = 0; - int n = 0; + int sum = 0; + int n = 0; start_index = n * n_threads; - while( start_index < n_partial ){ - data_sh[tid_block] = start_index+tid_block < n_partial ? prefix_sum_block_d[start_index+tid_block] : 0; + while (start_index < n_partial) { + data_sh[tid_block] = start_index + tid_block < n_partial ? prefix_sum_block_d[start_index + tid_block] : 0; __syncthreads(); - - if (tid_block == 0){ - for ( int i=0; i 0 ) printf( "##Thread transfer: %d\n", n_transfer_d[0]); +__global__ void Get_N_Transfer_Particles_Kernel(part_int_t n_total, int *n_transfer_d, bool *transfer_flags_d, + int *prefix_sum_d) +{ + n_transfer_d[0] = prefix_sum_d[n_total - 1] + (int)transfer_flags_d[n_total - 1]; + // if ( n_transfer_d[0] > 0 ) printf( "##Thread transfer: %d\n", + // n_transfer_d[0]); } -__global__ void Get_Transfer_Indices_Kernel( part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, int *transfer_indices_d ){ - +__global__ void Get_Transfer_Indices_Kernel(part_int_t n_total, bool *transfer_flags_d, int *prefix_sum_d, + int *transfer_indices_d) +{ int tid, transfer_index; - tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_total ) return; + tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= n_total) { + return; + } transfer_index = prefix_sum_d[tid]; - - if ( transfer_index < 0 || transfer_index >= n_total ){ - printf( "#### PARTICLE TRANSFER ERROR: transfer index outside domain: %d \n", transfer_index ); + + if (transfer_index < 0 || transfer_index >= n_total) { + printf("#### PARTICLE TRANSFER ERROR: transfer index outside domain: %d \n", transfer_index); return; } - - if ( transfer_flags_d[tid] ) transfer_indices_d[transfer_index] = tid; + if (transfer_flags_d[tid]) { + transfer_indices_d[transfer_index] = tid; + } } - -__global__ void Select_Indices_to_Replace_Tranfered_Kernel( part_int_t n_total, int n_transfer, bool *transfer_flags_d, int *prefix_sum_d, int *replace_indices_d ){ - +__global__ void Select_Indices_to_Replace_Transfered_Kernel(part_int_t n_total, int n_transfer, bool *transfer_flags_d, + int *prefix_sum_d, int *replace_indices_d) +{ int tid, tid_inv; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_total ) return; + if (tid >= n_total) { + return; + } tid_inv = n_total - tid - 1; bool transfer_flag = transfer_flags_d[tid]; - if ( transfer_flag ) return; + if (transfer_flag) { + return; + } int prefix_sum_inv, replace_id; prefix_sum_inv = n_transfer - prefix_sum_d[tid]; - replace_id = tid_inv - prefix_sum_inv; - - - if ( replace_id < 0 || replace_id >= n_total ){ - printf( "#### PARTICLE TRANSFER ERROR: replace index outside domain: %d \n", replace_id ); + replace_id = tid_inv - prefix_sum_inv; + + if (replace_id < 0 || replace_id >= n_total) { + printf("#### PARTICLE TRANSFER ERROR: replace index outside domain: %d \n", replace_id); return; - } + } replace_indices_d[replace_id] = tid; - } - - -__global__ void Replace_Transfered_Particles_Kernel( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ - +template +__global__ void Replace_Transfered_Particles_Kernel(int n_transfer, T *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) { + return; + } int dst_id, src_id; dst_id = transfer_indices_d[tid]; src_id = replace_indices_d[tid]; - if ( dst_id < src_id ){ - if (print_replace) printf("Replacing: %f \n", field_d[dst_id] ); + if (dst_id < src_id) { + if (print_replace) { + printf("Replacing: %f \n", field_d[dst_id] * 1.0); + } field_d[dst_id] = field_d[src_id]; } - } - -void Replace_Transfered_Particles_GPU_function( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ){ +void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace) +{ int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, transfer_indices_d, replace_indices_d, print_replace ); - CudaCheckError(); - + hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, + transfer_indices_d, replace_indices_d, print_replace); + GPU_Error_Check(); } +void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace) +{ + int grid_size; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + // number of blocks per 1D grid + dim3 dim1dGrid(grid_size, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB_PARTICLES, 1, 1); + + hipLaunchKernelGGL(Replace_Transfered_Particles_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_d, + transfer_indices_d, replace_indices_d, print_replace); + GPU_Error_Check(); +} -part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d ){ +part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax, + Real *pos_d, int *n_transfer_d, int *n_transfer_h, + bool *transfer_flags_d, int *transfer_indices_d, + int *replace_indices_d, int *transfer_prefix_sum_d, + int *transfer_prefix_sum_blocks_d) +{ // set values for GPU kernels int grid_size, grid_size_half; - grid_size = (n_local - 1) / TPB_PARTICLES + 1; - grid_size_half = ( (n_local-1)/2 ) / TPB_PARTICLES + 1; + grid_size = (n_local - 1) / TPB_PARTICLES + 1; + grid_size_half = ((n_local - 1) / 2) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); dim3 dim1dGrid_half(grid_size_half, 1, 1); @@ -310,122 +352,211 @@ part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int si return 0; } - hipLaunchKernelGGL( Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, side, domainMin, domainMax, pos_d, transfer_flags_d); - CudaCheckError(); - - hipLaunchKernelGGL( Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d ); - CudaCheckError(); - - hipLaunchKernelGGL( Prefix_Sum_Blocks_Kernel, 1, dim1dBlock , 0, 0, grid_size_half, transfer_prefix_sum_blocks_d ); - CudaCheckError(); - - hipLaunchKernelGGL( Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_prefix_sum_d, transfer_prefix_sum_blocks_d ); - CudaCheckError(); - - hipLaunchKernelGGL( Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, n_transfer_d, transfer_flags_d, transfer_prefix_sum_d ); - CudaCheckError(); - - CudaSafeCall( cudaMemcpy( n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost) ); - CudaCheckError(); - - hipLaunchKernelGGL( Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local , transfer_flags_d, transfer_prefix_sum_d, transfer_indices_d ); - CudaCheckError(); - - hipLaunchKernelGGL( Select_Indices_to_Replace_Tranfered_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer_h[0], transfer_flags_d, transfer_prefix_sum_d, replace_indices_d ); - CudaCheckError(); + hipLaunchKernelGGL(Get_Transfer_Flags_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, side, domainMin, domainMax, pos_d, + transfer_flags_d); + GPU_Error_Check(); - // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]); - return n_transfer_h[0]; + hipLaunchKernelGGL(Scan_Kernel, dim1dGrid_half, dim1dBlock, 0, 0, n_local, transfer_flags_d, transfer_prefix_sum_d, + transfer_prefix_sum_blocks_d); + GPU_Error_Check(); -} + hipLaunchKernelGGL(Prefix_Sum_Blocks_Kernel, 1, dim1dBlock, 0, 0, grid_size_half, transfer_prefix_sum_blocks_d); + GPU_Error_Check(); + + hipLaunchKernelGGL(Sum_Blocks_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_prefix_sum_d, + transfer_prefix_sum_blocks_d); + GPU_Error_Check(); + + hipLaunchKernelGGL(Get_N_Transfer_Particles_Kernel, 1, 1, 0, 0, n_local, n_transfer_d, transfer_flags_d, + transfer_prefix_sum_d); + GPU_Error_Check(); + GPU_Error_Check(cudaMemcpy(n_transfer_h, n_transfer_d, sizeof(int), cudaMemcpyDeviceToHost)); + GPU_Error_Check(); + hipLaunchKernelGGL(Get_Transfer_Indices_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, transfer_flags_d, + transfer_prefix_sum_d, transfer_indices_d); + GPU_Error_Check(); -__global__ void Load_Transfered_Particles_to_Buffer_Kernel( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ + hipLaunchKernelGGL(Select_Indices_to_Replace_Transfered_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer_h[0], + transfer_flags_d, transfer_prefix_sum_d, replace_indices_d); + GPU_Error_Check(); + // if ( n_transfer_h[0] > 0 )printf( "N transfer: %d\n", n_transfer_h[0]); + return n_transfer_h[0]; +} + +__global__ void Load_Transfered_Particles_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, int *transfer_indices_d, Real *send_buffer_d, + Real domainMin, Real domainMax, int boundary_type) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) { + return; + } int src_id, dst_id; Real field_val; - src_id = transfer_indices_d[tid]; - dst_id = tid * n_fields_to_transfer + field_id; + src_id = transfer_indices_d[tid]; + dst_id = tid * n_fields_to_transfer + field_id; field_val = field_d[src_id]; // Set global periodic boundary conditions - if ( boundary_type == 1 && field_val < domainMin ) field_val += ( domainMax - domainMin ); - if ( boundary_type == 1 && field_val >= domainMax ) field_val -= ( domainMax - domainMin ); + if (boundary_type == 1 && field_val < domainMin) { + field_val += (domainMax - domainMin); + } + if (boundary_type == 1 && field_val >= domainMax) { + field_val -= (domainMax - domainMin); + } send_buffer_d[dst_id] = field_val; - } -void Load_Particles_to_Transfer_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ){ - +void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type) +{ // set values for GPU kernels int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_transfer, field_id, n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, boundary_type ); - CudaCheckError(); - + hipLaunchKernelGGL(Load_Transfered_Particles_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id, + n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, + boundary_type); + GPU_Error_Check(); } +__global__ void Load_Transfered_Particles_Ints_to_Buffer_Kernel(int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, int *transfer_indices_d, + Real *send_buffer_d, Real domainMin, Real domainMax, + int boundary_type) +{ + int tid; + tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= n_transfer) { + return; + } -void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){ - - int transfer_size; - transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall( cudaMemcpy( buffer_h, buffer_d, transfer_size*sizeof(Real), cudaMemcpyDeviceToHost) ); - CudaCheckError(); + int src_id, dst_id; + part_int_t field_val; + src_id = transfer_indices_d[tid]; + dst_id = tid * n_fields_to_transfer + field_id; + field_val = field_d[src_id]; + // Set global periodic boundary conditions + if (boundary_type == 1 && field_val < domainMin) { + field_val += (domainMax - domainMin); + } + if (boundary_type == 1 && field_val >= domainMax) { + field_val -= (domainMax - domainMin); + } + send_buffer_d[dst_id] = __longlong_as_double(field_val); } +void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, + Real domainMin, Real domainMax, int boundary_type) +{ + // set values for GPU kernels + int grid_size; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + // number of blocks per 1D grid + dim3 dim1dGrid(grid_size, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB_PARTICLES, 1, 1); + hipLaunchKernelGGL(Load_Transfered_Particles_Ints_to_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_transfer, field_id, + n_fields_to_transfer, field_d, transfer_indices_d, send_buffer_d, domainMin, domainMax, + boundary_type); + GPU_Error_Check(); +} -void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ){ - + #ifdef MPI_CHOLLA +void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d) +{ int transfer_size; transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; - CudaSafeCall( cudaMemcpy( buffer_d, buffer_h, transfer_size*sizeof(Real), cudaMemcpyHostToDevice) ); - CudaCheckError(); - + GPU_Error_Check(cudaMemcpy(buffer_h, buffer_d, transfer_size * sizeof(Real), cudaMemcpyDeviceToHost)); + GPU_Error_Check(); } +void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d) +{ + int transfer_size; + transfer_size = n_transfer * N_DATA_PER_PARTICLE_TRANSFER; + GPU_Error_Check(cudaMemcpy(buffer_d, buffer_h, transfer_size * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(); +} + #endif // MPI_CHOLLA -__global__ void Unload_Transfered_Particles_from_Buffer_Kernel( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ){ - +__global__ void Unload_Transfered_Particles_from_Buffer_Kernel(int n_local, int n_transfer, int field_id, + int n_fields_to_transfer, Real *field_d, + Real *recv_buffer_d) +{ int tid; tid = threadIdx.x + blockIdx.x * blockDim.x; - if ( tid >= n_transfer ) return; + if (tid >= n_transfer) { + return; + } int src_id, dst_id; - src_id = tid * n_fields_to_transfer + field_id; - dst_id = n_local + tid; + src_id = tid * n_fields_to_transfer + field_id; + dst_id = n_local + tid; field_d[dst_id] = recv_buffer_d[src_id]; +} +void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, Real *recv_buffer_d) +{ + // set values for GPU kernels + int grid_size; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + // number of blocks per 1D grid + dim3 dim1dGrid(grid_size, 1, 1); + // number of threads per 1D block + dim3 dim1dBlock(TPB_PARTICLES, 1, 1); + + hipLaunchKernelGGL(Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, n_transfer, + field_id, n_fields_to_transfer, field_d, recv_buffer_d); + GPU_Error_Check(); } -void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ){ +__global__ void Unload_Transfered_Particles_Int_from_Buffer_Kernel(int n_local, int n_transfer, int field_id, + int n_fields_to_transfer, part_int_t *field_d, + Real *recv_buffer_d) +{ + int tid; + tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= n_transfer) { + return; + } + int src_id, dst_id; + src_id = tid * n_fields_to_transfer + field_id; + dst_id = n_local + tid; + field_d[dst_id] = __double_as_longlong(recv_buffer_d[src_id]); +} + +void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, Real *recv_buffer_d) +{ // set values for GPU kernels int grid_size; - grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; + grid_size = (n_transfer - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(grid_size, 1, 1); // number of threads per 1D block dim3 dim1dBlock(TPB_PARTICLES, 1, 1); - hipLaunchKernelGGL( Unload_Transfered_Particles_from_Buffer_Kernel, dim1dGrid, dim1dBlock , 0, 0, n_local, n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d ); - CudaCheckError(); - + hipLaunchKernelGGL(Unload_Transfered_Particles_Int_from_Buffer_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, + n_transfer, field_id, n_fields_to_transfer, field_d, recv_buffer_d); + GPU_Error_Check(); } // #endif//MPI_CHOLLA - -#endif //PARTICLES +#endif // PARTICLES diff --git a/src/particles/particles_boundaries_gpu.h b/src/particles/particles_boundaries_gpu.h index d10fb3428..638102ad2 100644 --- a/src/particles/particles_boundaries_gpu.h +++ b/src/particles/particles_boundaries_gpu.h @@ -1,21 +1,34 @@ #if defined(PARTICLES) && defined(PARTICLES_GPU) -#ifndef PARTICLES_BOUNDARIES_H -#define PARTICLES_BOUNDARIES_H - -part_int_t Select_Particles_to_Transfer_GPU_function( part_int_t n_local, int side, Real domainMin, Real domainMax, Real *pos_d, int *n_transfer_d, int *n_transfer_h, bool *transfer_flags_d, int *transfer_indices_d, int *replace_indices_d, int *transfer_prefix_sum_d, int *transfer_prefix_sum_blocks_d ); - -void Load_Particles_to_Transfer_GPU_function( int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, int *transfer_indices_d, Real *send_buffer_d, Real domainMin, Real domainMax, int boundary_type ); - -void Replace_Transfered_Particles_GPU_function( int n_transfer, Real *field_d, int *transfer_indices_d, int *replace_indices_d, bool print_replace ); - -void Copy_Particles_GPU_Buffer_to_Host_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ); - -void Copy_Particles_Host_Buffer_to_GPU_Buffer( int n_transfer, Real *buffer_h, Real *buffer_d ); - -void Unload_Particles_to_Transfer_GPU_function( int n_local, int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, Real *recv_buffer_d ); - - - -#endif //PARTICLES_H -#endif //PARTICLES \ No newline at end of file + #ifndef PARTICLES_BOUNDARIES_H + #define PARTICLES_BOUNDARIES_H + +part_int_t Select_Particles_to_Transfer_GPU_function(part_int_t n_local, int side, Real domainMin, Real domainMax, + Real *pos_d, int *n_transfer_d, int *n_transfer_h, + bool *transfer_flags_d, int *transfer_indices_d, + int *replace_indices_d, int *transfer_prefix_sum_d, + int *transfer_prefix_sum_blocks_d); + +void Load_Particles_to_Transfer_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, Real *field_d, + int *transfer_indices_d, Real *send_buffer_d, Real domainMin, + Real domainMax, int boundary_type); +void Load_Particles_to_Transfer_Int_GPU_function(int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, int *transfer_indices_d, Real *send_buffer_d, + Real domainMin, Real domainMax, int boundary_type); + +void Replace_Transfered_Particles_GPU_function(int n_transfer, Real *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace); +void Replace_Transfered_Particles_Int_GPU_function(int n_transfer, part_int_t *field_d, int *transfer_indices_d, + int *replace_indices_d, bool print_replace); + +void Copy_Particles_GPU_Buffer_to_Host_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d); + +void Copy_Particles_Host_Buffer_to_GPU_Buffer(int n_transfer, Real *buffer_h, Real *buffer_d); + +void Unload_Particles_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + Real *field_d, Real *recv_buffer_d); +void Unload_Particles_Int_to_Transfer_GPU_function(int n_local, int n_transfer, int field_id, int n_fields_to_transfer, + part_int_t *field_d, Real *recv_buffer_d); + + #endif // PARTICLES_H +#endif // PARTICLES \ No newline at end of file diff --git a/src/particles/particles_dynamics.cpp b/src/particles/particles_dynamics.cpp index de00b1426..39aeba6c7 100644 --- a/src/particles/particles_dynamics.cpp +++ b/src/particles/particles_dynamics.cpp @@ -1,58 +1,55 @@ #ifdef PARTICLES + #include + #include -#include -#include -#include "math.h" -#include -#include "../global/global.h" -#include "../grid/grid3D.h" -#include "../particles/particles_3D.h" -#include "../io/io.h" + #include -#ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" -#endif + #include "../global/global.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "math.h" + #include "particles_3D.h" + #ifdef PARALLEL_OMP + #include "../utils/parallel_omp.h" + #endif -//Compute the delta_t for the particles -Real Grid3D::Calc_Particles_dt( ){ - +// Compute the delta_t for the particles +Real Grid3D::Calc_Particles_dt() +{ Real dt_particles; #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - dt_particles = Calc_Particles_dt_function( 0, Particles.n_local ); - #else + #ifndef PARALLEL_OMP + dt_particles = Calc_Particles_dt_function(0, Particles.n_local); + #else dt_particles = 1e100; Real dt_particles_all[N_OMP_THREADS]; - #pragma omp parallel num_threads( N_OMP_THREADS ) + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - dt_particles_all[omp_id] = Calc_Particles_dt_function( p_start, p_end ); + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); + dt_particles_all[omp_id] = Calc_Particles_dt_function(p_start, p_end); } - for ( int i=0; i Particles.G.size_blocks_array ) chprintf(" Error: particles dt_array too small\n"); + int ngrid = (Particles.n_local - 1) / TPB_PARTICLES + 1; + if (ngrid > Particles.G.size_blocks_array) { + chprintf(" Error: particles dt_array too small\n"); + } Real max_dti; - max_dti = Particles.Calc_Particles_dt_GPU_function( ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.G.dti_array_host, Particles.G.dti_array_dev ); + max_dti = Particles.Calc_Particles_dt_GPU_function( + ngrid, Particles.n_local, Particles.G.dx, Particles.G.dy, Particles.G.dz, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.G.dti_array_host, Particles.G.dti_array_dev); Real dt_min; - #ifdef COSMOLOGY + #ifdef COSMOLOGY Real scale_factor, vel_factor, da_min; - scale_factor = 1 / ( Cosmo.current_a * Cosmo.Get_Hubble_Parameter( Cosmo.current_a) ) * Cosmo.cosmo_h; - vel_factor = Cosmo.current_a / scale_factor; - da_min = vel_factor / max_dti; - dt_min = Cosmo.Get_dt_from_da( da_min ); - #else + scale_factor = 1 / (Cosmo.current_a * Cosmo.Get_Hubble_Parameter(Cosmo.current_a)) * Cosmo.cosmo_h; + vel_factor = Cosmo.current_a / scale_factor; + da_min = vel_factor / max_dti; + dt_min = Cosmo.Get_dt_from_da(da_min); + #else dt_min = 1 / max_dti; - #endif - - return Particles.C_cfl*dt_min; + #endif + return Particles.C_cfl * dt_min; } -//Update positions and velocities (step 1 of KDK scheme ) in the GPU -void Grid3D::Advance_Particles_KDK_Step1_GPU(){ - - #ifdef COSMOLOGY - Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function( Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K ); - #else - Particles.Advance_Particles_KDK_Step1_GPU_function( Particles.n_local, Particles.dt, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev ); - #endif - - +// Update positions and velocities (step 1 of KDK scheme ) in the GPU +void Grid3D::Advance_Particles_KDK_Step1_GPU() +{ + #ifdef COSMOLOGY + Particles.Advance_Particles_KDK_Step1_Cosmo_GPU_function( + Particles.n_local, Cosmo.delta_a, Particles.pos_x_dev, Particles.pos_y_dev, Particles.pos_z_dev, + Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, + Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); + #else + Particles.Advance_Particles_KDK_Step1_GPU_function(Particles.n_local, Particles.dt, Particles.pos_x_dev, + Particles.pos_y_dev, Particles.pos_z_dev, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, + Particles.grav_y_dev, Particles.grav_z_dev); + #endif } -//Update velocities (step 2 of KDK scheme ) in the GPU -void Grid3D::Advance_Particles_KDK_Step2_GPU(){ - - #ifdef COSMOLOGY - Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function( Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K ); - #else - Particles.Advance_Particles_KDK_Step2_GPU_function( Particles.n_local, Particles.dt, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev ); - #endif - - +// Update velocities (step 2 of KDK scheme ) in the GPU +void Grid3D::Advance_Particles_KDK_Step2_GPU() +{ + #ifdef COSMOLOGY + Particles.Advance_Particles_KDK_Step2_Cosmo_GPU_function( + Particles.n_local, Cosmo.delta_a, Particles.vel_x_dev, Particles.vel_y_dev, Particles.vel_z_dev, + Particles.grav_x_dev, Particles.grav_y_dev, Particles.grav_z_dev, Cosmo.current_a, Cosmo.H0, Cosmo.cosmo_h, + Cosmo.Omega_M, Cosmo.Omega_L, Cosmo.Omega_K); + #else + Particles.Advance_Particles_KDK_Step2_GPU_function(Particles.n_local, Particles.dt, Particles.vel_x_dev, + Particles.vel_y_dev, Particles.vel_z_dev, Particles.grav_x_dev, + Particles.grav_y_dev, Particles.grav_z_dev); + #endif } + #endif // PARTICLES_GPU -#endif //PARTICLES_GPU - - - - -#ifdef PARTICLES_CPU + #ifdef PARTICLES_CPU -//Loop over the particles anf compute dt_min -Real Grid3D::Calc_Particles_dt_function( part_int_t p_start, part_int_t p_end ){ +// Loop over the particles anf compute dt_min +Real Grid3D::Calc_Particles_dt_function(part_int_t p_start, part_int_t p_end) +{ part_int_t pID; Real dt, dt_min, vel; dt_min = 1e100; - for ( pID=p_start; pID 0){ - dt = Particles.G.dx / vel; - dt_min = std::min( dt_min, dt); + if (vel > 0) { + dt = Particles.G.dx / vel; + dt_min = std::min(dt_min, dt); } vel = fabs(Particles.vel_y[pID]); - if ( vel > 0){ - dt = Particles.G.dy / vel; - dt_min = std::min( dt_min, dt); + if (vel > 0) { + dt = Particles.G.dy / vel; + dt_min = std::min(dt_min, dt); } vel = fabs(Particles.vel_z[pID]); - if ( vel > 0){ - dt = Particles.G.dz / vel; - dt_min = std::min( dt_min, dt); + if (vel > 0) { + dt = Particles.G.dz / vel; + dt_min = std::min(dt_min, dt); } } return Particles.C_cfl * dt_min; } -#endif //PARTICLES_CPU - -//Update the particles positions and velocities -void Grid3D::Advance_Particles( int N_step ){ + #endif // PARTICLES_CPU +// Update the particles positions and velocities +void Grid3D::Advance_Particles(int N_step) +{ + GPU_Error_Check(); #ifdef CPU_TIME - if ( N_step == 1) Timer.Advance_Part_1.Start(); - if ( N_step == 2) Timer.Advance_Part_2.Start(); + if (N_step == 1) { + Timer.Advance_Part_1.Start(); + } + if (N_step == 2) { + Timer.Advance_Part_2.Start(); + } #endif #ifdef PARTICLES_KDK - //Update the velocities by 0.5*delta_t and update the positions by delta_t - if ( N_step == 1 ) Advance_Particles_KDK_Step1(); + // Update the velocities by 0.5*delta_t and update the positions by delta_t + if (N_step == 1) { + Advance_Particles_KDK_Step1(); + } #endif - if ( N_step == 2 ){ - //Compute the particles accelerations at the new positions + if (N_step == 2) { + // Compute the particles accelerations at the new positions Get_Particles_Acceleration(); - #ifdef PARTICLES_KDK - //Advance the particles velocities by the remaining 0.5*delta_t + #ifdef PARTICLES_KDK + // Advance the particles velocities by the remaining 0.5*delta_t Advance_Particles_KDK_Step2(); - #endif - + #endif } #ifdef CPU_TIME - if ( N_step == 1) Timer.Advance_Part_1.End(); - if ( N_step == 2) Timer.Advance_Part_2.End(); + if (N_step == 1) { + Timer.Advance_Part_1.End(); + } + if (N_step == 2) { + Timer.Advance_Part_2.End(); + } #endif - + GPU_Error_Check(); } // Get the accteleration for all the particles -void Grid3D::Get_Particles_Acceleration(){ - - //First compute the gravitational field at the center of the grid cells +void Grid3D::Get_Particles_Acceleration() +{ + // First compute the gravitational field at the center of the grid cells Get_Gravity_Field_Particles(); - //Then Interpolate the gravitational field from the centers of the cells to the positions of the particles + // Then Interpolate the gravitational field from the centers of the cells to + // the positions of the particles Get_Gravity_CIC(); } -//Update positions and velocities (step 1 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step1( ){ - +// Update positions and velocities (step 1 of KDK scheme ) +void Grid3D::Advance_Particles_KDK_Step1() +{ #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step1_function( 0, Particles.n_local ); - #else - Advance_Particles_KDK_Step1_function( 0, Particles.n_local ); - #endif//COSMOLOGY - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifndef PARALLEL_OMP + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step1_function(0, Particles.n_local); + #else + Advance_Particles_KDK_Step1_function(0, Particles.n_local); + #endif // COSMOLOGY + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step1_function( p_start, p_end ); - #else - Advance_Particles_KDK_Step1_function( p_start, p_end ); - #endif//COSMOLOGY + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step1_function(p_start, p_end); + #else + Advance_Particles_KDK_Step1_function(p_start, p_end); + #endif // COSMOLOGY } - #endif //PARALLEL_OMP - #endif //PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU Advance_Particles_KDK_Step1_GPU(); - #endif //PARTICLES_GPU - + #endif // PARTICLES_GPU } -//Update velocities (step 2 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step2( ){ - +// Update velocities (step 2 of KDK scheme ) +void Grid3D::Advance_Particles_KDK_Step2() +{ #ifdef PARTICLES_CPU - #ifndef PARALLEL_OMP - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step2_function( 0, Particles.n_local ); - #else - Advance_Particles_KDK_Step2_function( 0, Particles.n_local ); - #endif//COSMOLOGY - #else - #pragma omp parallel num_threads( N_OMP_THREADS ) + #ifndef PARALLEL_OMP + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step2_function(0, Particles.n_local); + #else + Advance_Particles_KDK_Step2_function(0, Particles.n_local); + #endif // COSMOLOGY + #else + #pragma omp parallel num_threads(N_OMP_THREADS) { int omp_id, n_omp_procs; part_int_t p_start, p_end; - omp_id = omp_get_thread_num(); + omp_id = omp_get_thread_num(); n_omp_procs = omp_get_num_threads(); - Get_OMP_Particles_Indxs( Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end ); - #ifdef COSMOLOGY - Advance_Particles_KDK_Cosmo_Step2_function( p_start, p_end ); - #else - Advance_Particles_KDK_Step2_function( p_start, p_end ); - #endif//COSMOLOGY + Get_OMP_Particles_Indxs(Particles.n_local, N_OMP_THREADS, omp_id, &p_start, &p_end); + #ifdef COSMOLOGY + Advance_Particles_KDK_Cosmo_Step2_function(p_start, p_end); + #else + Advance_Particles_KDK_Step2_function(p_start, p_end); + #endif // COSMOLOGY } - #endif //PARALLEL_OMP - #endif //PARTICLES_CPU + #endif // PARALLEL_OMP + #endif // PARTICLES_CPU #ifdef PARTICLES_GPU Advance_Particles_KDK_Step2_GPU(); - #endif //PARTICLES_GPU - + #endif // PARTICLES_GPU } -#ifdef PARTICLES_CPU -//Update positions and velocities (step 1 of KDK scheme ) -void Grid3D::Advance_Particles_KDK_Step1_function( part_int_t p_start, part_int_t p_end ){ - + #ifdef PARTICLES_CPU +// Update positions and velocities (step 1 of KDK scheme ) +void Grid3D::Advance_Particles_KDK_Step1_function(part_int_t p_start, part_int_t p_end) +{ part_int_t pID; Real dt = Particles.dt; // Advance velocities by half a step - for ( pID=p_start; pID -#include -#include -#include -#include "../utils/gpu.hpp" -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../grid/grid3D.h" -#include "../io/io.h" -#include "../particles/particles_3D.h" - -#ifdef COSMOLOGY -#include "../cosmology/cosmology.h" + #include + #include + #include + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../grid/grid3D.h" + #include "../io/io.h" + #include "../utils/gpu.hpp" + #include "particles_3D.h" + + #ifdef COSMOLOGY + #include "../cosmology/cosmology.h" // #include "../cosmology/cosmology_functions_gpu.h" -// FUTURE FIX: The Hubble function was defined here because I couldn't get it form other file, tried -dc flag when compiling buu paris broke. -__device__ Real Get_Hubble_Parameter_dev( Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K ){ - Real a2 = a * a; - Real a3 = a2 * a; - Real factor = ( Omega_M/a3 + Omega_K/a2 + Omega_L ); +// FUTURE FIX: The Hubble function was defined here because I couldn't get it +// form other file, tried -dc flag when compiling buu paris broke. +__device__ Real Get_Hubble_Parameter_dev(Real a, Real H0, Real Omega_M, Real Omega_L, Real Omega_K) +{ + Real a2 = a * a; + Real a3 = a2 * a; + Real factor = (Omega_M / a3 + Omega_K / a2 + Omega_L); return H0 * sqrt(factor); - } -#endif - + #endif - - - -__global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *dti_array ) +__global__ void Calc_Particles_dti_Kernel(part_int_t n_local, Real dx, Real dy, Real dz, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *dti_array) { __shared__ Real max_dti[TPB_PARTICLES]; @@ -37,7 +36,7 @@ __global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, int tid; // get a global thread ID - id = blockIdx.x * blockDim.x + threadIdx.x ; + id = blockIdx.x * blockDim.x + threadIdx.x; // and a thread id within the block tid = threadIdx.x; @@ -50,39 +49,38 @@ __global__ void Calc_Particles_dti_Kernel( part_int_t n_local, Real dx, Real dy, // if( tid == 0 ) printf("%f %f %f \n", dx, dy, dz ); // threads corresponding to real cells do the calculation - if (id < n_local ){ + if (id < n_local) { // every thread collects the variables it needs from global memory - vx = vel_x_dev[id]; - vy = vel_y_dev[id]; - vz = vel_z_dev[id]; - max_dti[tid] = fmax( fabs(vx)/dx, fabs(vy)/dy); - max_dti[tid] = fmax( max_dti[tid], fabs(vz)/dz); - max_dti[tid] = fmax( max_dti[tid], 0.0); + vx = vel_x_dev[id]; + vy = vel_y_dev[id]; + vz = vel_z_dev[id]; + max_dti[tid] = fmax(fabs(vx) / dx, fabs(vy) / dy); + max_dti[tid] = fmax(max_dti[tid], fabs(vz) / dz); + max_dti[tid] = fmax(max_dti[tid], 0.0); } __syncthreads(); - // do the reduction in shared memory (find the max inverse timestep in the block) - for (unsigned int s=1; s= n_local) return; +__global__ void Advance_Particles_KDK_Step1_Kernel(part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, + Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) { + return; + } // Advance velocities by half a step vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid]; vel_y_dev[tid] += 0.5 * dt * grav_y_dev[tid]; vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid]; - //Advance Positions using advanced velocities + // Advance Positions using advanced velocities pos_x_dev[tid] += dt * vel_x_dev[tid]; pos_y_dev[tid] += dt * vel_y_dev[tid]; pos_z_dev[tid] += dt * vel_z_dev[tid]; } - -__global__ void Advance_Particles_KDK_Step2_Kernel( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +__global__ void Advance_Particles_KDK_Step2_Kernel(part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) { + return; + } // Advance velocities by the second half a step vel_x_dev[tid] += 0.5 * dt * grav_x_dev[tid]; vel_y_dev[tid] += 0.5 * dt * grav_y_dev[tid]; vel_z_dev[tid] += 0.5 * dt * grav_z_dev[tid]; - } - -void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, Real dt, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){ - +void Particles3D::Advance_Particles_KDK_Step1_GPU_function(part_int_t n_local, Real dt, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -157,16 +158,18 @@ void Particles_3D::Advance_Particles_KDK_Step1_GPU_function( part_int_t n_local, // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev ); - CudaCheckError(); + hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, pos_x_dev, + pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); + GPU_Error_Check(); } } - -void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, Real dt, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev ){ - +void Particles3D::Advance_Particles_KDK_Step2_GPU_function(part_int_t n_local, Real dt, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -174,35 +177,39 @@ void Particles_3D::Advance_Particles_KDK_Step2_GPU_function( part_int_t n_local, // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev ); - CudaCheckError(); + hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, dt, vel_x_dev, + vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev); + GPU_Error_Check(); } } + #ifdef COSMOLOGY -#ifdef COSMOLOGY - - -__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +__global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel(part_int_t n_local, Real da, Real *pos_x_dev, Real *pos_y_dev, + Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, + Real Omega_M, Real Omega_L, Real Omega_K) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) { + return; + } Real vel_x, vel_y, vel_z; vel_x = vel_x_dev[tid]; vel_y = vel_y_dev[tid]; vel_z = vel_z_dev[tid]; - Real da_half, a_half, H, H_half, dt, dt_half; - da_half = da/2; - a_half = current_a + da_half; + da_half = da / 2; + a_half = current_a + da_half; - H = Get_Hubble_Parameter_dev( current_a, H0, Omega_M, Omega_L, Omega_K ); - H_half = Get_Hubble_Parameter_dev( a_half, H0, Omega_M, Omega_L, Omega_K ); + H = Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K); + H_half = Get_Hubble_Parameter_dev(a_half, H0, Omega_M, Omega_L, Omega_K); - dt = da / ( current_a * H ) * cosmo_h; - dt_half = da / ( a_half * H_half ) * cosmo_h / ( a_half ); + dt = da / (current_a * H) * cosmo_h; + dt_half = da / (a_half * H_half) * cosmo_h / (a_half); // if ( tid == 0 ) printf( "dt: %f\n", dt); // if ( tid == 0 ) printf( "pos_x: %f\n", pos_x_dev[tid]); @@ -210,24 +217,28 @@ __global__ void Advance_Particles_KDK_Step1_Cosmo_Kernel( part_int_t n_local, Re // if ( tid == 0 ) printf( "grav_x: %f\n", grav_x_dev[tid]); // Advance velocities by half a step - vel_x = ( current_a*vel_x + 0.5*dt*grav_x_dev[tid] ) / a_half; - vel_y = ( current_a*vel_y + 0.5*dt*grav_y_dev[tid] ) / a_half; - vel_z = ( current_a*vel_z + 0.5*dt*grav_z_dev[tid] ) / a_half; + vel_x = (current_a * vel_x + 0.5 * dt * grav_x_dev[tid]) / a_half; + vel_y = (current_a * vel_y + 0.5 * dt * grav_y_dev[tid]) / a_half; + vel_z = (current_a * vel_z + 0.5 * dt * grav_z_dev[tid]) / a_half; vel_x_dev[tid] = vel_x; vel_y_dev[tid] = vel_y; vel_z_dev[tid] = vel_z; - //Advance Positions using advanced velocities + // Advance Positions using advanced velocities pos_x_dev[tid] += dt_half * vel_x; pos_y_dev[tid] += dt_half * vel_y; pos_z_dev[tid] += dt_half * vel_z; } - -__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - - part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x ; - if ( tid >= n_local) return; +__global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel(part_int_t n_local, Real da, Real *vel_x_dev, Real *vel_y_dev, + Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, + Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, + Real Omega_M, Real Omega_L, Real Omega_K) +{ + part_int_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= n_local) { + return; + } Real vel_x, vel_y, vel_z; vel_x = vel_x_dev[tid]; @@ -235,23 +246,26 @@ __global__ void Advance_Particles_KDK_Step2_Cosmo_Kernel( part_int_t n_local, Re vel_z = vel_z_dev[tid]; Real da_half, a_half, dt; - da_half = da/2; - a_half = current_a - da_half; + da_half = da / 2; + a_half = current_a - da_half; - dt = da / ( current_a * Get_Hubble_Parameter_dev( current_a, H0, Omega_M, Omega_L, Omega_K ) ) * cosmo_h; + dt = da / (current_a * Get_Hubble_Parameter_dev(current_a, H0, Omega_M, Omega_L, Omega_K)) * cosmo_h; // Advance velocities by the second half a step - vel_x_dev[tid] = ( a_half*vel_x + 0.5*dt*grav_x_dev[tid] ) / current_a; - vel_y_dev[tid] = ( a_half*vel_y + 0.5*dt*grav_y_dev[tid] ) / current_a; - vel_z_dev[tid] = ( a_half*vel_z + 0.5*dt*grav_z_dev[tid] ) / current_a; - + vel_x_dev[tid] = (a_half * vel_x + 0.5 * dt * grav_x_dev[tid]) / current_a; + vel_y_dev[tid] = (a_half * vel_y + 0.5 * dt * grav_y_dev[tid]) / current_a; + vel_z_dev[tid] = (a_half * vel_z + 0.5 * dt * grav_z_dev[tid]) / current_a; } - -void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *pos_x_dev, Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - +void Particles3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *pos_x_dev, + Real *pos_y_dev, Real *pos_z_dev, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -259,19 +273,22 @@ void Particles_3D::Advance_Particles_KDK_Step1_Cosmo_GPU_function( part_int_t n_ // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K ); - CHECK(cudaDeviceSynchronize()); - // CudaCheckError(); + hipLaunchKernelGGL(Advance_Particles_KDK_Step1_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, + pos_x_dev, pos_y_dev, pos_z_dev, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, + grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K); + GPU_Error_Check(cudaDeviceSynchronize()); + // GPU_Error_Check(); } - } - - -void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_local, Real delta_a, Real *vel_x_dev, Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, Real *grav_y_dev, Real *grav_z_dev, Real current_a, Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, Real Omega_K ){ - +void Particles3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function(part_int_t n_local, Real delta_a, Real *vel_x_dev, + Real *vel_y_dev, Real *vel_z_dev, Real *grav_x_dev, + Real *grav_y_dev, Real *grav_z_dev, Real current_a, + Real H0, Real cosmo_h, Real Omega_M, Real Omega_L, + Real Omega_K) +{ // set values for GPU kernels - int ngrid = (n_local + TPB_PARTICLES - 1) / TPB_PARTICLES; + int ngrid = (n_local - 1) / TPB_PARTICLES + 1; // number of blocks per 1D grid dim3 dim1dGrid(ngrid, 1, 1); // number of threads per 1D block @@ -279,15 +296,14 @@ void Particles_3D::Advance_Particles_KDK_Step2_Cosmo_GPU_function( part_int_t n_ // Only runs if there are local particles if (n_local > 0) { - hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, Omega_M, Omega_L, Omega_K ); - CHECK(cudaDeviceSynchronize()); - // CudaCheckError(); + hipLaunchKernelGGL(Advance_Particles_KDK_Step2_Cosmo_Kernel, dim1dGrid, dim1dBlock, 0, 0, n_local, delta_a, + vel_x_dev, vel_y_dev, vel_z_dev, grav_x_dev, grav_y_dev, grav_z_dev, current_a, H0, cosmo_h, + Omega_M, Omega_L, Omega_K); + GPU_Error_Check(cudaDeviceSynchronize()); + // GPU_Error_Check(); } } -#endif //COSMOLOGY - - - + #endif // COSMOLOGY #endif diff --git a/src/particles/starburst99_snr.txt b/src/particles/starburst99_snr.txt new file mode 100755 index 000000000..449821025 --- /dev/null +++ b/src/particles/starburst99_snr.txt @@ -0,0 +1,1007 @@ + MODEL DESIGNATION: MW_center + MODEL GENERATED: Mon Nov 28 15:05:08 2022 + + RESULTS FOR THE SUPERNOVA RATE + + ALL SUPERNOVAE TYPE IB SUPERNOVAE ALL SUPERNOVAE STARS + SUPERNOVAE + TIME TOTAL RATE POWER ENERGY TOTAL RATE POWER ENERGY TYPICAL MASS LOWEST PROG. MASS POWER ENERGY + 0.100E+05 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.937 51.436 + 0.110E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.943 52.483 + 0.210E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.950 52.767 + 0.310E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.956 52.940 + 0.410E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.962 53.064 + 0.510E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.968 53.162 + 0.610E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.974 53.243 + 0.710E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.979 53.312 + 0.810E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.985 53.372 + 0.910E+06 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.990 53.426 + 0.101E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.996 53.474 + 0.111E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.001 53.518 + 0.121E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.005 53.558 + 0.131E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.010 53.595 + 0.141E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.013 53.629 + 0.151E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.015 53.662 + 0.161E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.016 53.691 + 0.171E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.016 53.719 + 0.181E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.013 53.746 + 0.191E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.007 53.770 + 0.201E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.997 53.792 + 0.211E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.983 53.813 + 0.221E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.962 53.832 + 0.231E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.932 53.849 + 0.241E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.946 53.866 + 0.251E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 39.970 53.883 + 0.261E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.016 53.901 + 0.271E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.026 53.919 + 0.281E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.009 53.936 + 0.291E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.094 53.955 + 0.301E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.202 53.978 + 0.311E+07 -30.000 -30.000 -30.000 -30.000 -30.000 -30.000 0.0 0.0 40.265 54.004 + 0.321E+07 -4.388 39.113 51.612 -4.388 39.113 51.612 99.7 99.3 40.333 54.032 + 0.331E+07 -3.308 40.193 52.727 -3.308 40.193 52.727 96.2 91.5 40.545 54.075 + 0.341E+07 -3.317 40.184 53.007 -3.317 40.184 53.007 88.8 84.8 40.534 54.113 + 0.351E+07 -3.306 40.195 53.179 -3.306 40.195 53.179 82.5 79.0 40.509 54.145 + 0.361E+07 -3.296 40.205 53.304 -3.296 40.205 53.304 77.0 73.9 40.484 54.174 + 0.371E+07 -3.266 40.235 53.408 -3.266 40.235 53.408 72.1 69.5 40.482 54.201 + 0.381E+07 -3.277 40.223 53.489 -3.277 40.223 53.489 67.9 65.5 40.454 54.225 + 0.391E+07 -3.269 40.232 53.559 -3.269 40.232 53.559 64.1 61.9 40.402 54.245 + 0.401E+07 -3.266 40.235 53.620 -3.266 40.235 53.620 60.7 58.8 40.372 54.263 + 0.411E+07 -3.269 40.232 53.672 -3.269 40.232 53.672 57.7 56.0 40.349 54.280 + 0.421E+07 -3.265 40.236 53.720 -3.265 40.236 53.720 55.0 53.5 40.335 54.295 + 0.431E+07 -3.262 40.239 53.763 -3.262 40.239 53.763 52.5 51.2 40.333 54.310 + 0.441E+07 -3.258 40.243 53.803 -3.258 40.243 53.803 50.3 49.1 40.328 54.324 + 0.451E+07 -3.255 40.246 53.839 -3.255 40.246 53.839 48.3 47.1 40.324 54.337 + 0.461E+07 -3.251 40.249 53.873 -3.251 40.249 53.873 46.4 45.4 40.319 54.350 + 0.471E+07 -3.227 40.274 53.906 -3.495 40.006 53.891 44.7 43.7 40.332 54.363 + 0.481E+07 -3.245 40.255 53.936 -30.000 -30.000 53.891 43.1 42.2 40.312 54.375 + 0.491E+07 -3.243 40.258 53.964 -30.000 -30.000 53.891 41.7 40.8 40.313 54.387 + 0.501E+07 -3.249 40.252 53.989 -30.000 -30.000 53.891 40.3 39.5 40.301 54.398 + 0.511E+07 -3.263 40.238 54.013 -30.000 -30.000 53.891 39.1 38.4 40.283 54.408 + 0.521E+07 -3.264 40.237 54.035 -30.000 -30.000 53.891 37.9 37.3 40.275 54.418 + 0.531E+07 -3.243 40.258 54.058 -30.000 -30.000 53.891 36.9 36.3 40.291 54.428 + 0.541E+07 -3.266 40.235 54.078 -30.000 -30.000 53.891 35.9 35.3 40.269 54.438 + 0.551E+07 -3.267 40.234 54.097 -3.693 39.808 53.902 35.0 34.4 40.266 54.447 + 0.561E+07 -3.267 40.234 54.115 -3.267 40.234 53.931 34.1 33.6 40.263 54.456 + 0.571E+07 -3.268 40.233 54.133 -3.268 40.233 53.957 33.3 32.8 40.258 54.464 + 0.581E+07 -3.269 40.232 54.150 -3.269 40.232 53.982 32.5 32.0 40.255 54.473 + 0.591E+07 -3.267 40.234 54.166 -3.267 40.234 54.006 31.7 31.3 40.254 54.481 + 0.601E+07 -3.267 40.234 54.182 -3.267 40.234 54.029 31.0 30.6 40.250 54.489 + 0.611E+07 -3.268 40.233 54.197 -3.268 40.233 54.050 30.4 30.0 40.250 54.497 + 0.621E+07 -3.268 40.233 54.212 -3.268 40.233 54.071 29.7 29.4 40.250 54.505 + 0.631E+07 -3.268 40.233 54.226 -3.268 40.233 54.090 29.1 28.8 40.250 54.512 + 0.641E+07 -3.269 40.232 54.240 -3.269 40.232 54.109 28.6 28.2 40.246 54.519 + 0.651E+07 -3.269 40.232 54.253 -3.269 40.232 54.126 28.0 27.7 40.245 54.527 + 0.661E+07 -3.269 40.232 54.266 -3.404 40.097 54.139 27.5 27.2 40.244 54.534 + 0.671E+07 -3.270 40.231 54.278 -30.000 -30.000 54.139 27.0 26.7 40.243 54.541 + 0.681E+07 -3.270 40.231 54.290 -30.000 -30.000 54.139 26.5 26.2 40.242 54.548 + 0.691E+07 -3.271 40.230 54.302 -30.000 -30.000 54.139 26.0 25.8 40.241 54.554 + 0.701E+07 -3.271 40.230 54.314 -30.000 -30.000 54.139 25.6 25.3 40.241 54.561 + 0.711E+07 -3.273 40.228 54.325 -30.000 -30.000 54.139 25.2 24.9 40.239 54.567 + 0.721E+07 -3.280 40.221 54.335 -30.000 -30.000 54.139 24.8 24.5 40.232 54.574 + 0.731E+07 -3.282 40.219 54.346 -30.000 -30.000 54.139 24.4 24.1 40.231 54.580 + 0.741E+07 -3.283 40.218 54.356 -30.000 -30.000 54.139 24.0 23.8 40.229 54.586 + 0.751E+07 -3.285 40.216 54.366 -30.000 -30.000 54.139 23.6 23.4 40.228 54.592 + 0.761E+07 -3.286 40.215 54.375 -30.000 -30.000 54.139 23.3 23.1 40.227 54.598 + 0.771E+07 -3.287 40.214 54.385 -30.000 -30.000 54.139 23.0 22.8 40.225 54.604 + 0.781E+07 -3.288 40.212 54.394 -30.000 -30.000 54.139 22.6 22.5 40.224 54.609 + 0.791E+07 -3.290 40.211 54.403 -30.000 -30.000 54.139 22.3 22.2 40.223 54.615 + 0.801E+07 -3.291 40.210 54.411 -30.000 -30.000 54.139 22.0 21.9 40.221 54.620 + 0.811E+07 -3.292 40.208 54.420 -30.000 -30.000 54.139 21.7 21.6 40.219 54.626 + 0.821E+07 -3.294 40.207 54.428 -30.000 -30.000 54.139 21.5 21.3 40.216 54.631 + 0.831E+07 -3.295 40.206 54.436 -30.000 -30.000 54.139 21.2 21.0 40.214 54.636 + 0.841E+07 -3.296 40.204 54.444 -30.000 -30.000 54.139 20.9 20.8 40.212 54.641 + 0.851E+07 -3.298 40.203 54.452 -30.000 -30.000 54.139 20.7 20.5 40.210 54.646 + 0.861E+07 -3.299 40.202 54.460 -30.000 -30.000 54.139 20.4 20.3 40.208 54.651 + 0.871E+07 -3.300 40.201 54.467 -30.000 -30.000 54.139 20.2 20.0 40.207 54.656 + 0.881E+07 -3.328 40.173 54.474 -30.000 -30.000 54.139 20.0 19.8 40.179 54.661 + 0.891E+07 -3.340 40.161 54.481 -30.000 -30.000 54.139 19.8 19.6 40.167 54.665 + 0.901E+07 -3.344 40.157 54.487 -30.000 -30.000 54.139 19.6 19.4 40.162 54.669 + 0.911E+07 -3.348 40.153 54.493 -30.000 -30.000 54.139 19.4 19.2 40.158 54.674 + 0.921E+07 -3.352 40.149 54.500 -30.000 -30.000 54.139 19.2 19.1 40.154 54.678 + 0.931E+07 -3.356 40.145 54.506 -30.000 -30.000 54.139 19.0 18.9 40.149 54.682 + 0.941E+07 -3.360 40.141 54.511 -30.000 -30.000 54.139 18.8 18.7 40.145 54.686 + 0.951E+07 -3.363 40.137 54.517 -30.000 -30.000 54.139 18.6 18.5 40.141 54.690 + 0.961E+07 -3.367 40.134 54.523 -30.000 -30.000 54.139 18.5 18.4 40.137 54.694 + 0.971E+07 -3.371 40.130 54.528 -30.000 -30.000 54.139 18.3 18.2 40.134 54.697 + 0.981E+07 -3.374 40.127 54.534 -30.000 -30.000 54.139 18.1 18.1 40.130 54.701 + 0.991E+07 -3.378 40.123 54.539 -30.000 -30.000 54.139 18.0 17.9 40.126 54.705 + 0.100E+08 -3.381 40.120 54.544 -30.000 -30.000 54.139 17.8 17.8 40.123 54.708 + 0.101E+08 -3.385 40.116 54.549 -30.000 -30.000 54.139 17.7 17.6 40.119 54.712 + 0.102E+08 -3.388 40.113 54.554 -30.000 -30.000 54.139 17.6 17.5 40.116 54.715 + 0.103E+08 -3.391 40.110 54.559 -30.000 -30.000 54.139 17.4 17.3 40.112 54.719 + 0.104E+08 -3.394 40.107 54.564 -30.000 -30.000 54.139 17.3 17.2 40.109 54.722 + 0.105E+08 -3.398 40.103 54.569 -30.000 -30.000 54.139 17.2 17.1 40.106 54.725 + 0.106E+08 -3.401 40.100 54.573 -30.000 -30.000 54.139 17.0 16.9 40.102 54.728 + 0.107E+08 -3.404 40.097 54.578 -30.000 -30.000 54.139 16.9 16.8 40.099 54.732 + 0.108E+08 -3.407 40.094 54.582 -30.000 -30.000 54.139 16.8 16.7 40.096 54.735 + 0.109E+08 -3.410 40.091 54.587 -30.000 -30.000 54.139 16.7 16.6 40.093 54.738 + 0.110E+08 -3.413 40.088 54.591 -30.000 -30.000 54.139 16.5 16.5 40.090 54.741 + 0.111E+08 -3.416 40.085 54.595 -30.000 -30.000 54.139 16.4 16.4 40.087 54.744 + 0.112E+08 -3.418 40.083 54.600 -30.000 -30.000 54.139 16.3 16.3 40.084 54.747 + 0.113E+08 -3.421 40.080 54.604 -30.000 -30.000 54.139 16.2 16.1 40.081 54.750 + 0.114E+08 -3.424 40.077 54.608 -30.000 -30.000 54.139 16.1 16.0 40.078 54.753 + 0.115E+08 -3.427 40.074 54.612 -30.000 -30.000 54.139 16.0 15.9 40.076 54.756 + 0.116E+08 -3.430 40.071 54.616 -30.000 -30.000 54.139 15.9 15.8 40.073 54.759 + 0.117E+08 -3.432 40.069 54.620 -30.000 -30.000 54.139 15.8 15.7 40.070 54.761 + 0.118E+08 -3.435 40.066 54.623 -30.000 -30.000 54.139 15.7 15.6 40.067 54.764 + 0.119E+08 -3.438 40.063 54.627 -30.000 -30.000 54.139 15.6 15.5 40.064 54.767 + 0.120E+08 -3.440 40.061 54.631 -30.000 -30.000 54.139 15.5 15.5 40.062 54.770 + 0.121E+08 -3.443 40.058 54.634 -30.000 -30.000 54.139 15.4 15.4 40.059 54.772 + 0.122E+08 -3.445 40.056 54.638 -30.000 -30.000 54.139 15.3 15.3 40.056 54.775 + 0.123E+08 -3.448 40.053 54.642 -30.000 -30.000 54.139 15.2 15.2 40.054 54.777 + 0.124E+08 -3.450 40.051 54.645 -30.000 -30.000 54.139 15.1 15.1 40.051 54.780 + 0.125E+08 -3.453 40.048 54.649 -30.000 -30.000 54.139 15.1 15.0 40.049 54.783 + 0.126E+08 -3.454 40.047 54.652 -30.000 -30.000 54.139 15.0 14.9 40.048 54.785 + 0.127E+08 -3.456 40.045 54.655 -30.000 -30.000 54.139 14.9 14.8 40.045 54.788 + 0.128E+08 -3.458 40.042 54.659 -30.000 -30.000 54.139 14.8 14.8 40.043 54.790 + 0.129E+08 -3.461 40.040 54.662 -30.000 -30.000 54.139 14.7 14.7 40.041 54.792 + 0.130E+08 -3.463 40.038 54.665 -30.000 -30.000 54.139 14.7 14.6 40.038 54.795 + 0.131E+08 -3.465 40.035 54.668 -30.000 -30.000 54.139 14.6 14.5 40.036 54.797 + 0.132E+08 -3.468 40.033 54.672 -30.000 -30.000 54.139 14.5 14.5 40.034 54.800 + 0.133E+08 -3.470 40.031 54.675 -30.000 -30.000 54.139 14.4 14.4 40.032 54.802 + 0.134E+08 -3.472 40.029 54.678 -30.000 -30.000 54.139 14.4 14.3 40.029 54.804 + 0.135E+08 -3.474 40.027 54.681 -30.000 -30.000 54.139 14.3 14.2 40.027 54.807 + 0.136E+08 -3.477 40.024 54.684 -30.000 -30.000 54.139 14.2 14.2 40.025 54.809 + 0.137E+08 -3.479 40.022 54.687 -30.000 -30.000 54.139 14.1 14.1 40.023 54.811 + 0.138E+08 -3.481 40.020 54.690 -30.000 -30.000 54.139 14.1 14.0 40.021 54.813 + 0.139E+08 -3.483 40.018 54.693 -30.000 -30.000 54.139 14.0 14.0 40.019 54.815 + 0.140E+08 -3.485 40.016 54.696 -30.000 -30.000 54.139 13.9 13.9 40.016 54.818 + 0.141E+08 -3.487 40.014 54.698 -30.000 -30.000 54.139 13.9 13.8 40.014 54.820 + 0.142E+08 -3.489 40.012 54.701 -30.000 -30.000 54.139 13.8 13.8 40.012 54.822 + 0.143E+08 -3.491 40.010 54.704 -30.000 -30.000 54.139 13.7 13.7 40.010 54.824 + 0.144E+08 -3.493 40.008 54.707 -30.000 -30.000 54.139 13.7 13.7 40.008 54.826 + 0.145E+08 -3.495 40.006 54.709 -30.000 -30.000 54.139 13.6 13.6 40.006 54.828 + 0.146E+08 -3.497 40.004 54.712 -30.000 -30.000 54.139 13.6 13.5 40.004 54.830 + 0.147E+08 -3.499 40.002 54.715 -30.000 -30.000 54.139 13.5 13.5 40.002 54.832 + 0.148E+08 -3.501 40.000 54.717 -30.000 -30.000 54.139 13.4 13.4 40.000 54.834 + 0.149E+08 -3.503 39.998 54.720 -30.000 -30.000 54.139 13.4 13.4 39.998 54.836 + 0.150E+08 -3.505 39.996 54.723 -30.000 -30.000 54.139 13.3 13.3 39.996 54.838 + 0.151E+08 -3.507 39.994 54.725 -30.000 -30.000 54.139 13.3 13.2 39.994 54.840 + 0.152E+08 -3.509 39.992 54.728 -30.000 -30.000 54.139 13.2 13.2 39.992 54.842 + 0.153E+08 -3.511 39.990 54.730 -30.000 -30.000 54.139 13.2 13.1 39.990 54.844 + 0.154E+08 -3.513 39.988 54.733 -30.000 -30.000 54.139 13.1 13.1 39.988 54.846 + 0.155E+08 -3.515 39.986 54.735 -30.000 -30.000 54.139 13.1 13.0 39.986 54.848 + 0.156E+08 -3.517 39.984 54.738 -30.000 -30.000 54.139 13.0 13.0 39.985 54.850 + 0.157E+08 -3.518 39.983 54.740 -30.000 -30.000 54.139 13.0 12.9 39.983 54.852 + 0.158E+08 -3.520 39.981 54.742 -30.000 -30.000 54.139 12.9 12.9 39.981 54.853 + 0.159E+08 -3.522 39.979 54.745 -30.000 -30.000 54.139 12.9 12.8 39.979 54.855 + 0.160E+08 -3.524 39.977 54.747 -30.000 -30.000 54.139 12.8 12.8 39.977 54.857 + 0.161E+08 -3.526 39.975 54.749 -30.000 -30.000 54.139 12.8 12.7 39.975 54.859 + 0.162E+08 -3.528 39.973 54.752 -30.000 -30.000 54.139 12.7 12.7 39.974 54.861 + 0.163E+08 -3.529 39.972 54.754 -30.000 -30.000 54.139 12.7 12.6 39.972 54.862 + 0.164E+08 -3.531 39.970 54.756 -30.000 -30.000 54.139 12.6 12.6 39.970 54.864 + 0.165E+08 -3.533 39.968 54.758 -30.000 -30.000 54.139 12.6 12.5 39.968 54.866 + 0.166E+08 -3.535 39.966 54.761 -30.000 -30.000 54.139 12.5 12.5 39.967 54.868 + 0.167E+08 -3.536 39.965 54.763 -30.000 -30.000 54.139 12.5 12.4 39.965 54.869 + 0.168E+08 -3.538 39.963 54.765 -30.000 -30.000 54.139 12.4 12.4 39.963 54.871 + 0.169E+08 -3.540 39.961 54.767 -30.000 -30.000 54.139 12.4 12.4 39.961 54.873 + 0.170E+08 -3.541 39.960 54.769 -30.000 -30.000 54.139 12.3 12.3 39.960 54.874 + 0.171E+08 -3.543 39.958 54.771 -30.000 -30.000 54.139 12.3 12.3 39.958 54.876 + 0.172E+08 -3.545 39.956 54.773 -30.000 -30.000 54.139 12.3 12.2 39.956 54.878 + 0.173E+08 -3.546 39.955 54.775 -30.000 -30.000 54.139 12.2 12.2 39.955 54.879 + 0.174E+08 -3.548 39.953 54.778 -30.000 -30.000 54.139 12.2 12.1 39.953 54.881 + 0.175E+08 -3.550 39.951 54.780 -30.000 -30.000 54.139 12.1 12.1 39.951 54.883 + 0.176E+08 -3.551 39.950 54.782 -30.000 -30.000 54.139 12.1 12.1 39.950 54.884 + 0.177E+08 -3.553 39.948 54.784 -30.000 -30.000 54.139 12.0 12.0 39.948 54.886 + 0.178E+08 -3.554 39.947 54.786 -30.000 -30.000 54.139 12.0 12.0 39.947 54.887 + 0.179E+08 -3.554 39.947 54.788 -30.000 -30.000 54.139 12.0 11.9 39.947 54.889 + 0.180E+08 -3.556 39.945 54.790 -30.000 -30.000 54.139 11.9 11.9 39.945 54.890 + 0.181E+08 -3.557 39.944 54.791 -30.000 -30.000 54.139 11.9 11.9 39.944 54.892 + 0.182E+08 -3.559 39.942 54.793 -30.000 -30.000 54.139 11.8 11.8 39.942 54.894 + 0.183E+08 -3.560 39.941 54.795 -30.000 -30.000 54.139 11.8 11.8 39.941 54.895 + 0.184E+08 -3.562 39.939 54.797 -30.000 -30.000 54.139 11.8 11.8 39.939 54.897 + 0.185E+08 -3.563 39.938 54.799 -30.000 -30.000 54.139 11.7 11.7 39.938 54.898 + 0.186E+08 -3.565 39.936 54.801 -30.000 -30.000 54.139 11.7 11.7 39.936 54.900 + 0.187E+08 -3.566 39.935 54.803 -30.000 -30.000 54.139 11.7 11.6 39.935 54.901 + 0.188E+08 -3.568 39.933 54.805 -30.000 -30.000 54.139 11.6 11.6 39.933 54.903 + 0.189E+08 -3.569 39.932 54.807 -30.000 -30.000 54.139 11.6 11.6 39.932 54.904 + 0.190E+08 -3.571 39.930 54.808 -30.000 -30.000 54.139 11.6 11.5 39.931 54.905 + 0.191E+08 -3.572 39.929 54.810 -30.000 -30.000 54.139 11.5 11.5 39.929 54.907 + 0.192E+08 -3.573 39.928 54.812 -30.000 -30.000 54.139 11.5 11.5 39.928 54.908 + 0.193E+08 -3.575 39.926 54.814 -30.000 -30.000 54.139 11.5 11.4 39.926 54.910 + 0.194E+08 -3.576 39.924 54.816 -30.000 -30.000 54.139 11.4 11.4 39.925 54.911 + 0.195E+08 -3.578 39.923 54.817 -30.000 -30.000 54.139 11.4 11.4 39.923 54.913 + 0.196E+08 -3.579 39.922 54.819 -30.000 -30.000 54.139 11.4 11.3 39.922 54.914 + 0.197E+08 -3.581 39.920 54.821 -30.000 -30.000 54.139 11.3 11.3 39.920 54.915 + 0.198E+08 -3.582 39.919 54.822 -30.000 -30.000 54.139 11.3 11.3 39.919 54.917 + 0.199E+08 -3.583 39.918 54.824 -30.000 -30.000 54.139 11.3 11.2 39.918 54.918 + 0.200E+08 -3.585 39.916 54.826 -30.000 -30.000 54.139 11.2 11.2 39.916 54.919 + 0.201E+08 -3.586 39.915 54.828 -30.000 -30.000 54.139 11.2 11.2 39.915 54.921 + 0.202E+08 -3.587 39.913 54.829 -30.000 -30.000 54.139 11.2 11.1 39.914 54.922 + 0.203E+08 -3.589 39.912 54.831 -30.000 -30.000 54.139 11.1 11.1 39.912 54.924 + 0.204E+08 -3.590 39.911 54.832 -30.000 -30.000 54.139 11.1 11.1 39.911 54.925 + 0.205E+08 -3.592 39.909 54.834 -30.000 -30.000 54.139 11.1 11.1 39.909 54.926 + 0.206E+08 -3.593 39.908 54.836 -30.000 -30.000 54.139 11.0 11.0 39.908 54.927 + 0.207E+08 -3.594 39.907 54.837 -30.000 -30.000 54.139 11.0 11.0 39.907 54.929 + 0.208E+08 -3.595 39.905 54.839 -30.000 -30.000 54.139 11.0 11.0 39.906 54.930 + 0.209E+08 -3.597 39.904 54.841 -30.000 -30.000 54.139 10.9 10.9 39.904 54.931 + 0.210E+08 -3.598 39.903 54.842 -30.000 -30.000 54.139 10.9 10.9 39.903 54.933 + 0.211E+08 -3.599 39.901 54.844 -30.000 -30.000 54.139 10.9 10.9 39.902 54.934 + 0.212E+08 -3.601 39.900 54.845 -30.000 -30.000 54.139 10.9 10.8 39.900 54.935 + 0.213E+08 -3.602 39.899 54.847 -30.000 -30.000 54.139 10.8 10.8 39.899 54.936 + 0.214E+08 -3.603 39.897 54.848 -30.000 -30.000 54.139 10.8 10.8 39.898 54.938 + 0.215E+08 -3.604 39.896 54.850 -30.000 -30.000 54.139 10.8 10.8 39.897 54.939 + 0.216E+08 -3.606 39.895 54.851 -30.000 -30.000 54.139 10.8 10.7 39.895 54.940 + 0.217E+08 -3.607 39.894 54.853 -30.000 -30.000 54.139 10.7 10.7 39.894 54.941 + 0.218E+08 -3.609 39.892 54.854 -30.000 -30.000 54.139 10.7 10.7 39.892 54.943 + 0.219E+08 -3.610 39.891 54.856 -30.000 -30.000 54.139 10.7 10.7 39.891 54.944 + 0.220E+08 -3.611 39.890 54.857 -30.000 -30.000 54.139 10.6 10.6 39.890 54.945 + 0.221E+08 -3.612 39.889 54.859 -30.000 -30.000 54.139 10.6 10.6 39.889 54.946 + 0.222E+08 -3.613 39.887 54.860 -30.000 -30.000 54.139 10.6 10.6 39.887 54.947 + 0.223E+08 -3.615 39.886 54.862 -30.000 -30.000 54.139 10.6 10.6 39.886 54.949 + 0.224E+08 -3.616 39.885 54.863 -30.000 -30.000 54.139 10.5 10.5 39.885 54.950 + 0.225E+08 -3.617 39.884 54.865 -30.000 -30.000 54.139 10.5 10.5 39.884 54.951 + 0.226E+08 -3.618 39.882 54.866 -30.000 -30.000 54.139 10.5 10.5 39.883 54.952 + 0.227E+08 -3.620 39.881 54.867 -30.000 -30.000 54.139 10.5 10.4 39.881 54.953 + 0.228E+08 -3.621 39.880 54.869 -30.000 -30.000 54.139 10.4 10.4 39.880 54.954 + 0.229E+08 -3.622 39.879 54.870 -30.000 -30.000 54.139 10.4 10.4 39.879 54.956 + 0.230E+08 -3.623 39.878 54.872 -30.000 -30.000 54.139 10.4 10.4 39.878 54.957 + 0.231E+08 -3.624 39.876 54.873 -30.000 -30.000 54.139 10.4 10.4 39.876 54.958 + 0.232E+08 -3.626 39.875 54.874 -30.000 -30.000 54.139 10.3 10.3 39.875 54.959 + 0.233E+08 -3.627 39.874 54.876 -30.000 -30.000 54.139 10.3 10.3 39.874 54.960 + 0.234E+08 -3.628 39.873 54.877 -30.000 -30.000 54.139 10.3 10.3 39.873 54.961 + 0.235E+08 -3.629 39.872 54.878 -30.000 -30.000 54.139 10.3 10.3 39.872 54.962 + 0.236E+08 -3.630 39.871 54.880 -30.000 -30.000 54.139 10.2 10.2 39.871 54.964 + 0.237E+08 -3.632 39.869 54.881 -30.000 -30.000 54.139 10.2 10.2 39.869 54.965 + 0.238E+08 -3.632 39.868 54.882 -30.000 -30.000 54.139 10.2 10.2 39.868 54.966 + 0.239E+08 -3.634 39.867 54.884 -30.000 -30.000 54.139 10.2 10.2 39.867 54.967 + 0.240E+08 -3.635 39.866 54.885 -30.000 -30.000 54.139 10.2 10.1 39.866 54.968 + 0.241E+08 -3.636 39.865 54.886 -30.000 -30.000 54.139 10.1 10.1 39.865 54.969 + 0.242E+08 -3.637 39.863 54.888 -30.000 -30.000 54.139 10.1 10.1 39.863 54.970 + 0.243E+08 -3.638 39.863 54.889 -30.000 -30.000 54.139 10.1 10.1 39.863 54.971 + 0.244E+08 -3.640 39.861 54.890 -30.000 -30.000 54.139 10.1 10.1 39.861 54.972 + 0.245E+08 -3.641 39.860 54.892 -30.000 -30.000 54.139 10.0 10.0 39.860 54.973 + 0.246E+08 -3.642 39.859 54.893 -30.000 -30.000 54.139 10.0 10.0 39.859 54.974 + 0.247E+08 -3.643 39.858 54.894 -30.000 -30.000 54.139 10.0 10.0 39.858 54.975 + 0.248E+08 -3.644 39.857 54.895 -30.000 -30.000 54.139 10.0 10.0 39.857 54.976 + 0.249E+08 -3.645 39.856 54.897 -30.000 -30.000 54.139 10.0 9.9 39.856 54.977 + 0.250E+08 -3.646 39.855 54.898 -30.000 -30.000 54.139 9.9 9.9 39.855 54.978 + 0.251E+08 -3.647 39.854 54.899 -30.000 -30.000 54.139 9.9 9.9 39.854 54.979 + 0.252E+08 -3.648 39.852 54.900 -30.000 -30.000 54.139 9.9 9.9 39.852 54.980 + 0.253E+08 -3.650 39.851 54.902 -30.000 -30.000 54.139 9.9 9.9 39.851 54.981 + 0.254E+08 -3.651 39.850 54.903 -30.000 -30.000 54.139 9.9 9.8 39.850 54.983 + 0.255E+08 -3.652 39.849 54.904 -30.000 -30.000 54.139 9.8 9.8 39.849 54.984 + 0.256E+08 -3.653 39.848 54.905 -30.000 -30.000 54.139 9.8 9.8 39.848 54.985 + 0.257E+08 -3.654 39.847 54.906 -30.000 -30.000 54.139 9.8 9.8 39.847 54.986 + 0.258E+08 -3.655 39.846 54.908 -30.000 -30.000 54.139 9.8 9.8 39.846 54.987 + 0.259E+08 -3.656 39.845 54.909 -30.000 -30.000 54.139 9.8 9.7 39.845 54.987 + 0.260E+08 -3.657 39.844 54.910 -30.000 -30.000 54.139 9.7 9.7 39.844 54.988 + 0.261E+08 -3.658 39.843 54.911 -30.000 -30.000 54.139 9.7 9.7 39.843 54.989 + 0.262E+08 -3.659 39.842 54.912 -30.000 -30.000 54.139 9.7 9.7 39.842 54.990 + 0.263E+08 -3.660 39.841 54.913 -30.000 -30.000 54.139 9.7 9.7 39.841 54.991 + 0.264E+08 -3.661 39.839 54.915 -30.000 -30.000 54.139 9.7 9.7 39.840 54.992 + 0.265E+08 -3.662 39.839 54.916 -30.000 -30.000 54.139 9.6 9.6 39.839 54.993 + 0.266E+08 -3.663 39.838 54.917 -30.000 -30.000 54.139 9.6 9.6 39.838 54.994 + 0.267E+08 -3.665 39.836 54.918 -30.000 -30.000 54.139 9.6 9.6 39.836 54.995 + 0.268E+08 -3.665 39.836 54.919 -30.000 -30.000 54.139 9.6 9.6 39.836 54.996 + 0.269E+08 -3.667 39.834 54.920 -30.000 -30.000 54.139 9.6 9.6 39.834 54.997 + 0.270E+08 -3.667 39.833 54.921 -30.000 -30.000 54.139 9.6 9.5 39.834 54.998 + 0.271E+08 -3.669 39.832 54.923 -30.000 -30.000 54.139 9.5 9.5 39.832 54.999 + 0.272E+08 -3.669 39.831 54.924 -30.000 -30.000 54.139 9.5 9.5 39.832 55.000 + 0.273E+08 -3.671 39.830 54.925 -30.000 -30.000 54.139 9.5 9.5 39.830 55.001 + 0.274E+08 -3.671 39.829 54.926 -30.000 -30.000 54.139 9.5 9.5 39.829 55.002 + 0.275E+08 -3.673 39.828 54.927 -30.000 -30.000 54.139 9.5 9.5 39.828 55.003 + 0.276E+08 -3.674 39.827 54.928 -30.000 -30.000 54.139 9.4 9.4 39.827 55.004 + 0.277E+08 -3.674 39.827 54.929 -30.000 -30.000 54.139 9.4 9.4 39.827 55.005 + 0.278E+08 -3.676 39.825 54.930 -30.000 -30.000 54.139 9.4 9.4 39.825 55.005 + 0.279E+08 -3.677 39.824 54.931 -30.000 -30.000 54.139 9.4 9.4 39.824 55.006 + 0.280E+08 -3.677 39.823 54.932 -30.000 -30.000 54.139 9.4 9.4 39.824 55.007 + 0.281E+08 -3.679 39.822 54.933 -30.000 -30.000 54.139 9.4 9.3 39.822 55.008 + 0.282E+08 -3.679 39.821 54.934 -30.000 -30.000 54.139 9.3 9.3 39.821 55.009 + 0.283E+08 -3.681 39.820 54.935 -30.000 -30.000 54.139 9.3 9.3 39.820 55.010 + 0.284E+08 -3.681 39.820 54.937 -30.000 -30.000 54.139 9.3 9.3 39.820 55.011 + 0.285E+08 -3.683 39.818 54.938 -30.000 -30.000 54.139 9.3 9.3 39.818 55.012 + 0.286E+08 -3.684 39.817 54.939 -30.000 -30.000 54.139 9.3 9.3 39.817 55.013 + 0.287E+08 -3.684 39.817 54.940 -30.000 -30.000 54.139 9.3 9.3 39.817 55.013 + 0.288E+08 -3.685 39.815 54.941 -30.000 -30.000 54.139 9.2 9.2 39.815 55.014 + 0.289E+08 -3.686 39.815 54.942 -30.000 -30.000 54.139 9.2 9.2 39.815 55.015 + 0.290E+08 -3.688 39.813 54.943 -30.000 -30.000 54.139 9.2 9.2 39.813 55.016 + 0.291E+08 -3.688 39.813 54.944 -30.000 -30.000 54.139 9.2 9.2 39.813 55.017 + 0.292E+08 -3.689 39.812 54.945 -30.000 -30.000 54.139 9.2 9.2 39.812 55.018 + 0.293E+08 -3.690 39.811 54.946 -30.000 -30.000 54.139 9.2 9.2 39.811 55.019 + 0.294E+08 -3.691 39.810 54.947 -30.000 -30.000 54.139 9.1 9.1 39.810 55.019 + 0.295E+08 -3.692 39.809 54.948 -30.000 -30.000 54.139 9.1 9.1 39.809 55.020 + 0.296E+08 -3.693 39.808 54.949 -30.000 -30.000 54.139 9.1 9.1 39.808 55.021 + 0.297E+08 -3.694 39.807 54.950 -30.000 -30.000 54.139 9.1 9.1 39.807 55.022 + 0.298E+08 -3.695 39.806 54.951 -30.000 -30.000 54.139 9.1 9.1 39.806 55.023 + 0.299E+08 -3.696 39.805 54.952 -30.000 -30.000 54.139 9.1 9.1 39.805 55.024 + 0.300E+08 -3.697 39.804 54.953 -30.000 -30.000 54.139 9.1 9.0 39.804 55.024 + 0.301E+08 -3.697 39.803 54.954 -30.000 -30.000 54.139 9.0 9.0 39.804 55.025 + 0.302E+08 -3.699 39.802 54.955 -30.000 -30.000 54.139 9.0 9.0 39.802 55.026 + 0.303E+08 -3.700 39.801 54.956 -30.000 -30.000 54.139 9.0 9.0 39.801 55.027 + 0.304E+08 -3.685 39.816 54.957 -30.000 -30.000 54.139 9.0 9.0 39.816 55.028 + 0.305E+08 -3.678 39.822 54.958 -30.000 -30.000 54.139 9.0 9.0 39.823 55.029 + 0.306E+08 -3.679 39.822 54.959 -30.000 -30.000 54.139 9.0 9.0 39.822 55.029 + 0.307E+08 -3.679 39.821 54.960 -30.000 -30.000 54.139 9.0 8.9 39.821 55.030 + 0.308E+08 -3.680 39.821 54.961 -30.000 -30.000 54.139 8.9 8.9 39.821 55.031 + 0.309E+08 -3.681 39.820 54.962 -30.000 -30.000 54.139 8.9 8.9 39.820 55.032 + 0.310E+08 -3.681 39.819 54.963 -30.000 -30.000 54.139 8.9 8.9 39.819 55.033 + 0.311E+08 -3.682 39.819 54.964 -30.000 -30.000 54.139 8.9 8.9 39.819 55.034 + 0.312E+08 -3.683 39.818 54.964 -30.000 -30.000 54.139 8.9 8.9 39.818 55.034 + 0.313E+08 -3.683 39.818 54.965 -30.000 -30.000 54.139 8.9 8.9 39.818 55.035 + 0.314E+08 -3.684 39.817 54.966 -30.000 -30.000 54.139 8.8 8.8 39.817 55.036 + 0.315E+08 -3.685 39.816 54.967 -30.000 -30.000 54.139 8.8 8.8 39.816 55.037 + 0.316E+08 -3.685 39.816 54.968 -30.000 -30.000 54.139 8.8 8.8 39.816 55.038 + 0.317E+08 -3.686 39.815 54.969 -30.000 -30.000 54.139 8.8 8.8 39.815 55.039 + 0.318E+08 -3.686 39.814 54.970 -30.000 -30.000 54.139 8.8 8.8 39.814 55.039 + 0.319E+08 -3.687 39.814 54.971 -30.000 -30.000 54.139 8.8 8.8 39.814 55.040 + 0.320E+08 -3.688 39.813 54.972 -30.000 -30.000 54.139 8.8 8.8 39.813 55.041 + 0.321E+08 -3.688 39.813 54.973 -30.000 -30.000 54.139 8.7 8.7 39.813 55.042 + 0.322E+08 -3.689 39.812 54.974 -30.000 -30.000 54.139 8.7 8.7 39.812 55.043 + 0.323E+08 -3.690 39.811 54.975 -30.000 -30.000 54.139 8.7 8.7 39.811 55.043 + 0.324E+08 -3.690 39.811 54.976 -30.000 -30.000 54.139 8.7 8.7 39.811 55.044 + 0.325E+08 -3.691 39.810 54.977 -30.000 -30.000 54.139 8.7 8.7 39.810 55.045 + 0.326E+08 -3.691 39.810 54.978 -30.000 -30.000 54.139 8.7 8.7 39.810 55.046 + 0.327E+08 -3.692 39.809 54.979 -30.000 -30.000 54.139 8.7 8.7 39.809 55.047 + 0.328E+08 -3.692 39.809 54.980 -30.000 -30.000 54.139 8.7 8.6 39.809 55.047 + 0.329E+08 -3.693 39.808 54.981 -30.000 -30.000 54.139 8.6 8.6 39.808 55.048 + 0.330E+08 -3.693 39.808 54.982 -30.000 -30.000 54.139 8.6 8.6 39.808 55.049 + 0.331E+08 -3.694 39.807 54.982 -30.000 -30.000 54.139 8.6 8.6 39.807 55.050 + 0.332E+08 -3.694 39.807 54.983 -30.000 -30.000 54.139 8.6 8.6 39.807 55.051 + 0.333E+08 -3.695 39.806 54.984 -30.000 -30.000 54.139 8.6 8.6 39.806 55.051 + 0.334E+08 -3.696 39.805 54.985 -30.000 -30.000 54.139 8.6 8.6 39.805 55.052 + 0.335E+08 -3.696 39.805 54.986 -30.000 -30.000 54.139 8.6 8.5 39.805 55.053 + 0.336E+08 -3.697 39.804 54.987 -30.000 -30.000 54.139 8.5 8.5 39.804 55.054 + 0.337E+08 -3.697 39.803 54.988 -30.000 -30.000 54.139 8.5 8.5 39.804 55.054 + 0.338E+08 -3.698 39.803 54.989 -30.000 -30.000 54.139 8.5 8.5 39.803 55.055 + 0.339E+08 -3.699 39.802 54.990 -30.000 -30.000 54.139 8.5 8.5 39.802 55.056 + 0.340E+08 -3.699 39.802 54.991 -30.000 -30.000 54.139 8.5 8.5 39.802 55.057 + 0.341E+08 -3.700 39.801 54.991 -30.000 -30.000 54.139 8.5 8.5 39.801 55.058 + 0.342E+08 -3.701 39.800 54.992 -30.000 -30.000 54.139 8.5 8.5 39.800 55.058 + 0.343E+08 -3.701 39.800 54.993 -30.000 -30.000 54.139 8.5 8.4 39.800 55.059 + 0.344E+08 -3.702 39.799 54.994 -30.000 -30.000 54.139 8.4 8.4 39.799 55.060 + 0.345E+08 -3.702 39.799 54.995 -30.000 -30.000 54.139 8.4 8.4 39.799 55.061 + 0.346E+08 -3.703 39.798 54.996 -30.000 -30.000 54.139 8.4 8.4 39.798 55.061 + 0.347E+08 -3.703 39.797 54.997 -30.000 -30.000 54.139 8.4 8.4 39.798 55.062 + 0.348E+08 -3.703 39.798 54.998 -30.000 -30.000 54.139 8.4 8.4 39.798 55.063 + 0.349E+08 -3.705 39.796 54.998 -30.000 -30.000 54.139 8.4 8.4 39.796 55.064 + 0.350E+08 -3.705 39.796 54.999 -30.000 -30.000 54.139 8.4 8.4 39.796 55.064 + 0.351E+08 -3.706 39.795 55.000 -30.000 -30.000 54.139 8.4 8.3 39.795 55.065 + 0.352E+08 -3.706 39.795 55.001 -30.000 -30.000 54.139 8.3 8.3 39.795 55.066 + 0.353E+08 -3.707 39.794 55.002 -30.000 -30.000 54.139 8.3 8.3 39.794 55.066 + 0.354E+08 -3.707 39.794 55.003 -30.000 -30.000 54.139 8.3 8.3 39.794 55.067 + 0.355E+08 -3.708 39.793 55.004 -30.000 -30.000 54.139 8.3 8.3 39.793 55.068 + 0.356E+08 -3.708 39.793 55.004 -30.000 -30.000 54.139 8.3 8.3 39.793 55.069 + 0.357E+08 -3.709 39.792 55.005 -30.000 -30.000 54.139 8.3 8.3 39.792 55.069 + 0.358E+08 -3.709 39.792 55.006 -30.000 -30.000 54.139 8.3 8.3 39.792 55.070 + 0.359E+08 -3.710 39.791 55.007 -30.000 -30.000 54.139 8.3 8.3 39.791 55.071 + 0.360E+08 -3.710 39.791 55.008 -30.000 -30.000 54.139 8.2 8.2 39.791 55.072 + 0.361E+08 -3.711 39.790 55.009 -30.000 -30.000 54.139 8.2 8.2 39.790 55.072 + 0.362E+08 -3.712 39.789 55.009 -30.000 -30.000 54.139 8.2 8.2 39.789 55.073 + 0.363E+08 -3.712 39.789 55.010 -30.000 -30.000 54.139 8.2 8.2 39.789 55.074 + 0.364E+08 -3.713 39.788 55.011 -30.000 -30.000 54.139 8.2 8.2 39.788 55.074 + 0.365E+08 -3.713 39.788 55.012 -30.000 -30.000 54.139 8.2 8.2 39.788 55.075 + 0.366E+08 -3.713 39.788 55.013 -30.000 -30.000 54.139 8.2 8.2 39.788 55.076 + 0.367E+08 -3.714 39.787 55.013 -30.000 -30.000 54.139 8.2 8.2 39.787 55.076 + 0.368E+08 -3.715 39.786 55.014 -30.000 -30.000 54.139 8.2 8.1 39.786 55.077 + 0.369E+08 -3.715 39.786 55.015 -30.000 -30.000 54.139 8.1 8.1 39.786 55.078 + 0.370E+08 -3.716 39.785 55.016 -30.000 -30.000 54.139 8.1 8.1 39.785 55.079 + 0.371E+08 -3.716 39.785 55.017 -30.000 -30.000 54.139 8.1 8.1 39.785 55.079 + 0.372E+08 -3.717 39.784 55.017 -30.000 -30.000 54.139 8.1 8.1 39.784 55.080 + 0.373E+08 -3.717 39.784 55.018 -30.000 -30.000 54.139 8.1 8.1 39.784 55.081 + 0.374E+08 -3.718 39.783 55.019 -30.000 -30.000 54.139 8.1 8.1 39.783 55.081 + 0.375E+08 -3.718 39.783 55.020 -30.000 -30.000 54.139 8.1 8.1 39.783 55.082 + 0.376E+08 -3.719 39.782 55.021 -30.000 -30.000 54.139 8.1 8.1 39.782 55.083 + 0.377E+08 -3.719 39.781 55.021 -30.000 -30.000 54.139 8.1 8.0 39.781 55.083 + 0.378E+08 -3.720 39.781 55.022 -30.000 -30.000 54.139 8.0 8.0 39.781 55.084 + 0.379E+08 -3.720 39.781 55.023 -30.000 -30.000 54.139 8.0 8.0 39.781 55.085 + 0.380E+08 -3.721 39.780 55.024 -30.000 -30.000 54.139 8.0 8.0 39.780 55.085 + 0.381E+08 -3.722 39.779 55.025 -30.000 -30.000 54.139 8.0 8.0 39.779 55.086 + 0.382E+08 -3.922 39.579 55.025 -30.000 -30.000 54.139 8.0 0.0 39.579 55.087 + 0.383E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.745 55.087 + 0.384E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.746 55.087 + 0.385E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.744 55.087 + 0.386E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.736 55.087 + 0.387E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.738 55.087 + 0.388E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.741 55.087 + 0.389E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.739 55.087 + 0.390E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.740 55.087 + 0.391E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.739 55.087 + 0.392E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.731 55.087 + 0.393E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.727 55.087 + 0.394E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.731 55.087 + 0.395E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.728 55.087 + 0.396E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.731 55.087 + 0.397E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.723 55.087 + 0.398E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.724 55.087 + 0.399E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.724 55.087 + 0.400E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.719 55.087 + 0.401E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.719 55.087 + 0.402E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.716 55.087 + 0.403E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.717 55.087 + 0.404E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.716 55.087 + 0.405E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.717 55.087 + 0.406E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.716 55.087 + 0.407E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.705 55.087 + 0.408E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.703 55.087 + 0.409E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.712 55.087 + 0.410E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.710 55.087 + 0.411E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.702 55.087 + 0.412E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.702 55.087 + 0.413E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.697 55.087 + 0.414E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.697 55.087 + 0.415E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.701 55.087 + 0.416E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.696 55.087 + 0.417E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.696 55.087 + 0.418E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.693 55.087 + 0.419E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.693 55.087 + 0.420E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.692 55.087 + 0.421E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.687 55.087 + 0.422E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.684 55.087 + 0.423E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.685 55.087 + 0.424E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.683 55.087 + 0.425E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.685 55.087 + 0.426E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.682 55.087 + 0.427E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.682 55.087 + 0.428E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.679 55.087 + 0.429E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.679 55.087 + 0.430E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.679 55.087 + 0.431E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.680 55.087 + 0.432E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.675 55.087 + 0.433E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.675 55.087 + 0.434E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.669 55.087 + 0.435E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.673 55.087 + 0.436E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.669 55.087 + 0.437E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.661 55.087 + 0.438E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.666 55.087 + 0.439E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.670 55.087 + 0.440E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.668 55.087 + 0.441E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.661 55.087 + 0.442E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.663 55.087 + 0.443E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.668 55.087 + 0.444E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.667 55.087 + 0.445E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.659 55.087 + 0.446E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.657 55.087 + 0.447E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.651 55.087 + 0.448E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.668 55.087 + 0.449E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.666 55.087 + 0.450E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.663 55.087 + 0.451E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.656 55.087 + 0.452E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.646 55.087 + 0.453E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.650 55.087 + 0.454E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.647 55.087 + 0.455E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.658 55.087 + 0.456E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.658 55.087 + 0.457E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.645 55.087 + 0.458E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.645 55.087 + 0.459E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.648 55.087 + 0.460E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.652 55.087 + 0.461E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.652 55.087 + 0.462E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.646 55.087 + 0.463E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.653 55.087 + 0.464E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.643 55.087 + 0.465E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.649 55.087 + 0.466E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.649 55.087 + 0.467E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.650 55.087 + 0.468E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.650 55.087 + 0.469E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.639 55.087 + 0.470E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.643 55.087 + 0.471E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.645 55.087 + 0.472E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.644 55.087 + 0.473E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.643 55.087 + 0.474E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.631 55.087 + 0.475E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.625 55.087 + 0.476E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.644 55.087 + 0.477E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.639 55.087 + 0.478E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.626 55.087 + 0.479E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.633 55.087 + 0.480E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.635 55.087 + 0.481E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.627 55.087 + 0.482E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.628 55.087 + 0.483E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.614 55.087 + 0.484E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.618 55.087 + 0.485E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.606 55.087 + 0.486E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.606 55.087 + 0.487E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.606 55.087 + 0.488E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.605 55.087 + 0.489E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.596 55.087 + 0.490E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.597 55.087 + 0.491E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.598 55.087 + 0.492E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.578 55.087 + 0.493E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.575 55.087 + 0.494E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.576 55.087 + 0.495E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.587 55.087 + 0.496E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.574 55.087 + 0.497E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.573 55.087 + 0.498E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.572 55.087 + 0.499E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.571 55.087 + 0.500E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.562 55.087 + 0.501E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.561 55.087 + 0.502E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.555 55.087 + 0.503E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.554 55.087 + 0.504E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.562 55.087 + 0.505E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.549 55.087 + 0.506E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.547 55.087 + 0.507E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.547 55.087 + 0.508E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.546 55.087 + 0.509E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.534 55.087 + 0.510E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.528 55.087 + 0.511E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.543 55.087 + 0.512E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.526 55.087 + 0.513E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.528 55.087 + 0.514E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.527 55.087 + 0.515E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.526 55.087 + 0.516E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.522 55.087 + 0.517E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.509 55.087 + 0.518E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.507 55.087 + 0.519E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.506 55.087 + 0.520E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.501 55.087 + 0.521E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.500 55.087 + 0.522E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.494 55.087 + 0.523E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.484 55.087 + 0.524E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.492 55.087 + 0.525E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.496 55.087 + 0.526E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.481 55.087 + 0.527E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.466 55.087 + 0.528E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.480 55.087 + 0.529E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.478 55.087 + 0.530E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.478 55.087 + 0.531E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.459 55.087 + 0.532E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.462 55.087 + 0.533E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.475 55.087 + 0.534E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.453 55.087 + 0.535E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.456 55.087 + 0.536E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.451 55.087 + 0.537E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.453 55.087 + 0.538E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.453 55.087 + 0.539E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.452 55.087 + 0.540E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.444 55.087 + 0.541E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.438 55.087 + 0.542E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.434 55.087 + 0.543E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.433 55.087 + 0.544E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.432 55.087 + 0.545E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.431 55.087 + 0.546E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.430 55.087 + 0.547E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.429 55.087 + 0.548E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.409 55.087 + 0.549E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.408 55.087 + 0.550E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.417 55.087 + 0.551E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.406 55.087 + 0.552E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.405 55.087 + 0.553E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.405 55.087 + 0.554E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.404 55.087 + 0.555E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.403 55.087 + 0.556E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.398 55.087 + 0.557E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.401 55.087 + 0.558E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.396 55.087 + 0.559E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.400 55.087 + 0.560E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.388 55.087 + 0.561E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.381 55.087 + 0.562E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.376 55.087 + 0.563E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.375 55.087 + 0.564E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.371 55.087 + 0.565E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.357 55.087 + 0.566E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.356 55.087 + 0.567E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.352 55.087 + 0.568E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.354 55.087 + 0.569E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.354 55.087 + 0.570E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.345 55.087 + 0.571E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.344 55.087 + 0.572E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.343 55.087 + 0.573E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.350 55.087 + 0.574E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.326 55.087 + 0.575E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.345 55.087 + 0.576E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.347 55.087 + 0.577E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.347 55.087 + 0.578E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.328 55.087 + 0.579E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.327 55.087 + 0.580E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.321 55.087 + 0.581E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.326 55.087 + 0.582E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.328 55.087 + 0.583E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.324 55.087 + 0.584E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.315 55.087 + 0.585E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.322 55.087 + 0.586E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.304 55.087 + 0.587E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.303 55.087 + 0.588E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.288 55.087 + 0.589E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.280 55.087 + 0.590E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.276 55.087 + 0.591E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.275 55.087 + 0.592E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.274 55.087 + 0.593E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.273 55.087 + 0.594E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.272 55.087 + 0.595E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.273 55.087 + 0.596E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.272 55.087 + 0.597E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.272 55.087 + 0.598E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.250 55.087 + 0.599E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.267 55.087 + 0.600E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.248 55.087 + 0.601E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.248 55.087 + 0.602E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.247 55.087 + 0.603E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.246 55.087 + 0.604E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.245 55.087 + 0.605E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.244 55.087 + 0.606E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.236 55.087 + 0.607E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.215 55.087 + 0.608E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.198 55.087 + 0.609E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.198 55.087 + 0.610E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.213 55.087 + 0.611E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.212 55.087 + 0.612E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.195 55.087 + 0.613E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.194 55.087 + 0.614E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.188 55.087 + 0.615E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.187 55.087 + 0.616E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.183 55.087 + 0.617E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.182 55.087 + 0.618E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.173 55.087 + 0.619E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.172 55.087 + 0.620E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.166 55.087 + 0.621E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.168 55.087 + 0.622E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.169 55.087 + 0.623E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.167 55.087 + 0.624E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.125 55.087 + 0.625E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.162 55.087 + 0.626E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.151 55.087 + 0.627E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.126 55.087 + 0.628E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.118 55.087 + 0.629E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.080 55.087 + 0.630E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.079 55.087 + 0.631E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.078 55.087 + 0.632E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.078 55.087 + 0.633E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.077 55.087 + 0.634E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.076 55.087 + 0.635E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.101 55.087 + 0.636E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.075 55.087 + 0.637E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.074 55.087 + 0.638E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.074 55.087 + 0.639E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.094 55.087 + 0.640E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.040 55.087 + 0.641E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.039 55.087 + 0.642E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.064 55.087 + 0.643E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.037 55.087 + 0.644E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.062 55.087 + 0.645E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.024 55.087 + 0.646E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.023 55.087 + 0.647E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.022 55.087 + 0.648E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.034 55.087 + 0.649E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.988 55.087 + 0.650E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.036 55.087 + 0.651E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.987 55.087 + 0.652E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.986 55.087 + 0.653E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 35.003 55.087 + 0.654E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.985 55.087 + 0.655E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.984 55.087 + 0.656E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.983 55.087 + 0.657E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.982 55.087 + 0.658E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.982 55.087 + 0.659E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.981 55.087 + 0.660E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.997 55.087 + 0.661E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.951 55.087 + 0.662E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.895 55.087 + 0.663E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.894 55.087 + 0.664E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.962 55.087 + 0.665E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.849 55.087 + 0.666E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.930 55.087 + 0.667E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.848 55.087 + 0.668E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.890 55.087 + 0.669E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.886 55.087 + 0.670E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.889 55.087 + 0.671E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.888 55.087 + 0.672E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.888 55.087 + 0.673E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.844 55.087 + 0.674E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.796 55.087 + 0.675E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.842 55.087 + 0.676E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.842 55.087 + 0.677E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.791 55.087 + 0.678E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.793 55.087 + 0.679E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.789 55.087 + 0.680E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.839 55.087 + 0.681E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.791 55.087 + 0.682E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.726 55.087 + 0.683E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.725 55.087 + 0.684E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.786 55.087 + 0.685E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.724 55.087 + 0.686E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.723 55.087 + 0.687E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.722 55.087 + 0.688E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.722 55.087 + 0.689E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.640 55.087 + 0.690E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.639 55.087 + 0.691E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.639 55.087 + 0.692E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.642 55.087 + 0.693E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.637 55.087 + 0.694E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.534 55.087 + 0.695E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.614 55.087 + 0.696E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.613 55.087 + 0.697E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.638 55.087 + 0.698E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.634 55.087 + 0.699E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.633 55.087 + 0.700E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.633 55.087 + 0.701E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.533 55.087 + 0.702E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.631 55.087 + 0.703E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.627 55.087 + 0.704E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.527 55.087 + 0.705E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.362 55.087 + 0.706E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.523 55.087 + 0.707E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.427 55.087 + 0.708E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.360 55.087 + 0.709E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.425 55.087 + 0.710E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.263 55.087 + 0.711E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.150 55.087 + 0.712E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 34.059 55.087 + 0.713E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.412 55.087 + 0.714E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.411 55.087 + 0.715E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.410 55.087 + 0.716E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.408 55.087 + 0.717E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.407 55.087 + 0.718E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.406 55.087 + 0.719E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.405 55.087 + 0.720E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.404 55.087 + 0.721E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.402 55.087 + 0.722E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.401 55.087 + 0.723E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.401 55.087 + 0.724E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.399 55.087 + 0.725E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.398 55.087 + 0.726E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.397 55.087 + 0.727E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.395 55.087 + 0.728E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.394 55.087 + 0.729E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.393 55.087 + 0.730E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.392 55.087 + 0.731E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.391 55.087 + 0.732E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.390 55.087 + 0.733E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.389 55.087 + 0.734E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.388 55.087 + 0.735E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.387 55.087 + 0.736E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.386 55.087 + 0.737E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.384 55.087 + 0.738E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.383 55.087 + 0.739E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.382 55.087 + 0.740E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.381 55.087 + 0.741E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.379 55.087 + 0.742E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.379 55.087 + 0.743E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.377 55.087 + 0.744E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.377 55.087 + 0.745E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.375 55.087 + 0.746E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.375 55.087 + 0.747E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.373 55.087 + 0.748E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.372 55.087 + 0.749E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.370 55.087 + 0.750E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.370 55.087 + 0.751E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.369 55.087 + 0.752E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.368 55.087 + 0.753E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.367 55.087 + 0.754E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.365 55.087 + 0.755E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.364 55.087 + 0.756E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.363 55.087 + 0.757E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.362 55.087 + 0.758E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.361 55.087 + 0.759E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.360 55.087 + 0.760E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.359 55.087 + 0.761E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.358 55.087 + 0.762E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.357 55.087 + 0.763E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.356 55.087 + 0.764E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.354 55.087 + 0.765E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.354 55.087 + 0.766E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.352 55.087 + 0.767E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.351 55.087 + 0.768E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.350 55.087 + 0.769E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.349 55.087 + 0.770E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.348 55.087 + 0.771E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.347 55.087 + 0.772E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.346 55.087 + 0.773E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.345 55.087 + 0.774E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.344 55.087 + 0.775E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.343 55.087 + 0.776E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.342 55.087 + 0.777E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.341 55.087 + 0.778E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.340 55.087 + 0.779E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.339 55.087 + 0.780E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.337 55.087 + 0.781E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.337 55.087 + 0.782E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.336 55.087 + 0.783E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.335 55.087 + 0.784E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.334 55.087 + 0.785E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.333 55.087 + 0.786E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.331 55.087 + 0.787E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.330 55.087 + 0.788E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.329 55.087 + 0.789E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.329 55.087 + 0.790E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.327 55.087 + 0.791E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.326 55.087 + 0.792E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.325 55.087 + 0.793E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.324 55.087 + 0.794E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.323 55.087 + 0.795E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.322 55.087 + 0.796E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.321 55.087 + 0.797E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.320 55.087 + 0.798E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.319 55.087 + 0.799E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.318 55.087 + 0.800E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.317 55.087 + 0.801E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.316 55.087 + 0.802E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.315 55.087 + 0.803E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.314 55.087 + 0.804E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.313 55.087 + 0.805E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.312 55.087 + 0.806E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.311 55.087 + 0.807E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.310 55.087 + 0.808E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.309 55.087 + 0.809E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.308 55.087 + 0.810E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.307 55.087 + 0.811E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.306 55.087 + 0.812E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.306 55.087 + 0.813E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.304 55.087 + 0.814E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.303 55.087 + 0.815E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.302 55.087 + 0.816E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.301 55.087 + 0.817E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.300 55.087 + 0.818E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.300 55.087 + 0.819E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.299 55.087 + 0.820E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.297 55.087 + 0.821E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.296 55.087 + 0.822E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.296 55.087 + 0.823E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.294 55.087 + 0.824E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.294 55.087 + 0.825E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.292 55.087 + 0.826E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.292 55.087 + 0.827E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.291 55.087 + 0.828E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.290 55.087 + 0.829E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.289 55.087 + 0.830E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.288 55.087 + 0.831E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.287 55.087 + 0.832E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.286 55.087 + 0.833E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.285 55.087 + 0.834E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.284 55.087 + 0.835E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.283 55.087 + 0.836E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.282 55.087 + 0.837E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.282 55.087 + 0.838E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.280 55.087 + 0.839E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.279 55.087 + 0.840E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.278 55.087 + 0.841E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.277 55.087 + 0.842E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.276 55.087 + 0.843E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.275 55.087 + 0.844E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.274 55.087 + 0.845E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.274 55.087 + 0.846E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.273 55.087 + 0.847E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.272 55.087 + 0.848E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.270 55.087 + 0.849E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.270 55.087 + 0.850E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.269 55.087 + 0.851E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.268 55.087 + 0.852E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.267 55.087 + 0.853E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.266 55.087 + 0.854E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.266 55.087 + 0.855E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.264 55.087 + 0.856E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.264 55.087 + 0.857E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.263 55.087 + 0.858E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.262 55.087 + 0.859E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.260 55.087 + 0.860E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.260 55.087 + 0.861E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.259 55.087 + 0.862E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.258 55.087 + 0.863E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.257 55.087 + 0.864E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.256 55.087 + 0.865E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.255 55.087 + 0.866E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.254 55.087 + 0.867E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.253 55.087 + 0.868E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.253 55.087 + 0.869E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.251 55.087 + 0.870E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.251 55.087 + 0.871E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.250 55.087 + 0.872E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.249 55.087 + 0.873E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.248 55.087 + 0.874E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.247 55.087 + 0.875E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.246 55.087 + 0.876E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.245 55.087 + 0.877E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.244 55.087 + 0.878E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.244 55.087 + 0.879E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.243 55.087 + 0.880E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.242 55.087 + 0.881E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.241 55.087 + 0.882E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.240 55.087 + 0.883E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.239 55.087 + 0.884E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.238 55.087 + 0.885E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.237 55.087 + 0.886E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.237 55.087 + 0.887E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.236 55.087 + 0.888E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.235 55.087 + 0.889E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.234 55.087 + 0.890E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.233 55.087 + 0.891E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.232 55.087 + 0.892E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.232 55.087 + 0.893E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.231 55.087 + 0.894E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.230 55.087 + 0.895E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.229 55.087 + 0.896E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.228 55.087 + 0.897E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.227 55.087 + 0.898E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.226 55.087 + 0.899E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.225 55.087 + 0.900E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.225 55.087 + 0.901E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.224 55.087 + 0.902E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.223 55.087 + 0.903E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.222 55.087 + 0.904E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.221 55.087 + 0.905E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.221 55.087 + 0.906E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.220 55.087 + 0.907E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.219 55.087 + 0.908E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.218 55.087 + 0.909E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.218 55.087 + 0.910E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.217 55.087 + 0.911E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.216 55.087 + 0.912E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.216 55.087 + 0.913E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.215 55.087 + 0.914E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.215 55.087 + 0.915E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.214 55.087 + 0.916E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.213 55.087 + 0.917E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.212 55.087 + 0.918E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.212 55.087 + 0.919E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.211 55.087 + 0.920E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.210 55.087 + 0.921E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.210 55.087 + 0.922E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.209 55.087 + 0.923E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.208 55.087 + 0.924E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.208 55.087 + 0.925E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.207 55.087 + 0.926E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.206 55.087 + 0.927E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.206 55.087 + 0.928E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.205 55.087 + 0.929E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.204 55.087 + 0.930E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.204 55.087 + 0.931E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.203 55.087 + 0.932E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.202 55.087 + 0.933E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.202 55.087 + 0.934E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.201 55.087 + 0.935E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.201 55.087 + 0.936E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.200 55.087 + 0.937E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.199 55.087 + 0.938E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.198 55.087 + 0.939E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.198 55.087 + 0.940E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.197 55.087 + 0.941E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.197 55.087 + 0.942E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.196 55.087 + 0.943E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.195 55.087 + 0.944E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.195 55.087 + 0.945E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.194 55.087 + 0.946E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.193 55.087 + 0.947E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.193 55.087 + 0.948E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.192 55.087 + 0.949E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.191 55.087 + 0.950E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.191 55.087 + 0.951E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.190 55.087 + 0.952E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.189 55.087 + 0.953E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.189 55.087 + 0.954E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.188 55.087 + 0.955E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.187 55.087 + 0.956E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.187 55.087 + 0.957E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.186 55.087 + 0.958E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.185 55.087 + 0.959E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.185 55.087 + 0.960E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.184 55.087 + 0.961E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.183 55.087 + 0.962E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.183 55.087 + 0.963E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.182 55.087 + 0.964E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.181 55.087 + 0.965E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.181 55.087 + 0.966E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.180 55.087 + 0.967E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.179 55.087 + 0.968E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.179 55.087 + 0.969E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.178 55.087 + 0.970E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.177 55.087 + 0.971E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.177 55.087 + 0.972E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.176 55.087 + 0.973E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.175 55.087 + 0.974E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.175 55.087 + 0.975E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.174 55.087 + 0.976E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.173 55.087 + 0.977E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.173 55.087 + 0.978E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.172 55.087 + 0.979E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.172 55.087 + 0.980E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.171 55.087 + 0.981E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.170 55.087 + 0.982E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.169 55.087 + 0.983E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.169 55.087 + 0.984E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.168 55.087 + 0.985E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.167 55.087 + 0.986E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.167 55.087 + 0.987E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.166 55.087 + 0.988E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.166 55.087 + 0.989E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.165 55.087 + 0.990E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.164 55.087 + 0.991E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.164 55.087 + 0.992E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.163 55.087 + 0.993E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.162 55.087 + 0.994E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.162 55.087 + 0.995E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.161 55.087 + 0.996E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.160 55.087 + 0.997E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.159 55.087 + 0.998E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.159 55.087 + 0.999E+08 -30.000 -30.000 55.025 -30.000 -30.000 54.139 0.0 0.0 33.158 55.087 diff --git a/src/particles/supernova.h b/src/particles/supernova.h new file mode 100644 index 000000000..409b2e56b --- /dev/null +++ b/src/particles/supernova.h @@ -0,0 +1,32 @@ +#pragma once +#if defined(PARTICLES_GPU) && defined(SUPERNOVA) + + #include "../analysis/feedback_analysis.h" + #include "../global/global.h" + #ifdef O_HIP + #include + #include + #else + #include + #include + #endif // O_HIP + +namespace supernova +{ +const int SN = 0, RESOLVED = 1, NOT_RESOLVED = 2, ENERGY = 3, MOMENTUM = 4, UNRES_ENERGY = 5; + +// supernova rate: 1SN / 100 solar masses per 36 Myr +static const Real DEFAULT_SNR = 2.8e-7; +static const Real ENERGY_PER_SN = 1e51 / MASS_UNIT * TIME_UNIT * TIME_UNIT / LENGTH_UNIT / LENGTH_UNIT; +static const Real MASS_PER_SN = 10.0; // 10 solarMasses per SN +static const Real FINAL_MOMENTUM = + 2.8e5 / LENGTH_UNIT * 1e5 * TIME_UNIT; // 2.8e5 M_s km/s * n_0^{-0.17} -> eq.(34) Kim & Ostriker (2015) +static const Real MU = 0.6; +static const Real R_SH = 0.0302; // 30.2 pc * n_0^{-0.46} -> eq.(31) Kim & Ostriker (2015) +static const Real DEFAULT_SN_END = 40000; // default value for when SNe stop (40 Myr) +static const Real DEFAULT_SN_START = 4000; // default value for when SNe start (4 Myr) + +void initState(struct Parameters* P, part_int_t n_local, Real allocation_factor = 1); +Real Cluster_Feedback(Grid3D& G, FeedbackAnalysis& sn_analysis); +} // namespace supernova +#endif // PARTICLES_GPU && SUPERNOVA diff --git a/src/reconstruction/pcm_cuda.cu b/src/reconstruction/pcm_cuda.cu index 1964ddedf..e7264ca54 100644 --- a/src/reconstruction/pcm_cuda.cu +++ b/src/reconstruction/pcm_cuda.cu @@ -1,494 +1,424 @@ /*! \file pcm_cuda.cu * \brief Definitions of the piecewise constant reconstruction functions */ -#ifdef CUDA -#include "../utils/gpu.hpp" #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/pcm_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/mhd_utilities.h" - -__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, int n_ghost, Real gamma, int n_fields) +__global__ void PCM_Reconstruction_1D(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int n_cells, + int n_ghost, Real gamma, int n_fields) { - // declare conserved variables for each stencil // these will be placed into registers for each thread Real d, mx, my, mz, E; - #ifdef DE +#ifdef DE Real ge; - #endif +#endif // DE - #ifdef SCALAR +#ifdef SCALAR Real scalar[NSCALARS]; - #endif +#endif // SCALAR // get a global thread ID - int xid = threadIdx.x + blockIdx.x*blockDim.x; + int xid = threadIdx.x + blockIdx.x * blockDim.x; int id; - // threads corresponding to real cells plus one ghost cell do the calculation - if (xid < n_cells-1) - { + if (xid < n_cells - 1) { // retrieve appropriate conserved variables id = xid; - d = dev_conserved[ id]; - mx = dev_conserved[ n_cells + id]; - my = dev_conserved[2*n_cells + id]; - mz = dev_conserved[3*n_cells + id]; - E = dev_conserved[4*n_cells + id]; - #ifdef SCALAR - for (int i=0; i 0) { + id = cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny); + dev_bounds_Rx[id] = d; + dev_bounds_Rx[n_cells + id] = mx; + dev_bounds_Rx[2 * n_cells + id] = my; + dev_bounds_Rx[3 * n_cells + id] = mz; + dev_bounds_Rx[4 * n_cells + id] = E; +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_Rx[(5 + i) * n_cells + id] = scalar[i]; + } +#endif // SCALAR +#ifdef MHD + dev_bounds_Rx[(grid_enum::Q_x_magnetic_y)*n_cells + id] = cellCenteredBy; + dev_bounds_Rx[(grid_enum::Q_x_magnetic_z)*n_cells + id] = cellCenteredBz; +#endif // MHD +#ifdef DE + dev_bounds_Rx[(n_fields - 1) * n_cells + id] = ge; +#endif // DE } - #endif - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id]; - #endif - // send values back from the kernel - dev_bounds_Lz[ id] = d; - dev_bounds_Lz[ n_cells + id] = mx; - dev_bounds_Lz[2*n_cells + id] = my; - dev_bounds_Lz[3*n_cells + id] = mz; - dev_bounds_Lz[4*n_cells + id] = E; - #ifdef SCALAR - for (int i=0; i 0) { + // Send the y-1/2 Right interface + id = cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny); + dev_bounds_Ry[id] = d; + dev_bounds_Ry[n_cells + id] = mx; + dev_bounds_Ry[2 * n_cells + id] = my; + dev_bounds_Ry[3 * n_cells + id] = mz; + dev_bounds_Ry[4 * n_cells + id] = E; +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_Ry[(5 + i) * n_cells + id] = scalar[i]; + } +#endif // SCALAR +#ifdef MHD + dev_bounds_Ry[(grid_enum::Q_y_magnetic_z)*n_cells + id] = cellCenteredBz; + dev_bounds_Ry[(grid_enum::Q_y_magnetic_x)*n_cells + id] = cellCenteredBx; +#endif // MHD +#ifdef DE + dev_bounds_Ry[(n_fields - 1) * n_cells + id] = ge; +#endif // DE } - #endif - #ifdef DE - dev_bounds_Lz[(n_fields-1)*n_cells + id] = ge; - #endif - // retrieve appropriate conserved variables - id = xid + yid*nx + (zid+1)*nx*ny; - d = dev_conserved[ id]; - mx = dev_conserved[ n_cells + id]; - my = dev_conserved[2*n_cells + id]; - mz = dev_conserved[3*n_cells + id]; - E = dev_conserved[4*n_cells + id]; - #ifdef SCALAR - for (int i=0; i 0) { + // Send the z-1/2 Right interface + id = cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny); + dev_bounds_Rz[id] = d; + dev_bounds_Rz[n_cells + id] = mx; + dev_bounds_Rz[2 * n_cells + id] = my; + dev_bounds_Rz[3 * n_cells + id] = mz; + dev_bounds_Rz[4 * n_cells + id] = E; +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_bounds_Rz[(5 + i) * n_cells + id] = scalar[i]; + } +#endif // SCALAR +#ifdef MHD + dev_bounds_Rz[(grid_enum::Q_z_magnetic_x)*n_cells + id] = cellCenteredBx; + dev_bounds_Rz[(grid_enum::Q_z_magnetic_y)*n_cells + id] = cellCenteredBy; +#endif // MHD +#ifdef DE + dev_bounds_Rz[(n_fields - 1) * n_cells + id] = ge; +#endif // DE } - #endif - #ifdef DE - ge = dev_conserved[(n_fields-1)*n_cells + id]; - #endif - - // send values back from the kernel - id = xid + yid*nx + zid*nx*ny; - dev_bounds_Rz[ id] = d; - dev_bounds_Rz[ n_cells + id] = mx; - dev_bounds_Rz[2*n_cells + id] = my; - dev_bounds_Rz[3*n_cells + id] = mz; - dev_bounds_Rz[4*n_cells + id] = E; - #ifdef SCALAR - for (int i=0; i + #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/plmc_cuda.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" + +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif // DE + +/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, Real dx, Real dt, Real + gamma, int dir) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plm. */ +__global__ __launch_bounds__(TPB) void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real dx, Real dt, Real gamma, int dir, int n_fields) +{ + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif + // Ensure that we are only operating on cells that will be used + if (reconstruction::Thread_Guard<2>(nx, ny, nz, xid, yid, zid)) { + return; + } + // Compute the total number of cells + int const n_cells = nx * ny * nz; -/*! \fn __global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using plm. */ -__global__ void PLMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) -{ - int n_cells = nx*ny*nz; + // Set the field indices for the various directions int o1, o2, o3; - if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; } - if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; - } - if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + + // load the 3-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + reconstruction::Primitive const cell_imo = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + reconstruction::Primitive const cell_ipo = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // calculate the adiabatic sound speed in cell i + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + +// Compute the eigenvectors +#ifdef MHD + reconstruction::EigenVecs const eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::EigenVecs eigenvectors; +#endif // MHD + + // Compute the left, right, centered, and van Leer differences of the + // primitive variables Note that here L and R refer to locations relative to + // the cell center + + // left + reconstruction::Primitive const del_L = reconstruction::Compute_Slope(cell_imo, cell_i); + + // right + reconstruction::Primitive const del_R = reconstruction::Compute_Slope(cell_i, cell_ipo); + + // centered + reconstruction::Primitive const del_C = reconstruction::Compute_Slope(cell_imo, cell_ipo, 0.5); + + // Van Leer + reconstruction::Primitive const del_G = reconstruction::Van_Leer_Slope(del_L, del_R); + + // Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic const del_a_L = + reconstruction::Primitive_To_Characteristic(cell_i, del_L, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_R = + reconstruction::Primitive_To_Characteristic(cell_i, del_R, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_C = + reconstruction::Primitive_To_Characteristic(cell_i, del_C, eigenvectors, sound_speed, sound_speed_squared, gamma); + + reconstruction::Characteristic const del_a_G = + reconstruction::Primitive_To_Characteristic(cell_i, del_G, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Apply monotonicity constraints to the differences in the characteristic variables and project the monotonized + // difference in the characteristic variables back onto the primitive variables Stone Eqn 39 + reconstruction::Primitive del_m_i = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_i, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, + sound_speed_squared, gamma); + + // Compute the left and right interface values using the monotonized difference in the primitive variables + reconstruction::Primitive interface_L_iph = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, 1.0); + reconstruction::Primitive interface_R_imh = reconstruction::Calc_Interface_Linear(cell_i, del_m_i, -1.0); + + // Limit the interfaces + reconstruction::Plm_Limit_Interfaces(interface_L_iph, interface_R_imh, cell_imo, cell_i, cell_ipo); + +#ifndef VL + + Real const dtodx = dt / dx; + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; + + // Integrate linear interpolation function over domain of dependence + // defined by max(min) eigenvalue + Real qx = -0.5 * fmin(lambda_m, 0.0) * dtodx; + interface_R_imh.density = interface_R_imh.density + qx * del_m_i.density; + interface_R_imh.velocity_x = interface_R_imh.velocity_x + qx * del_m_i.velocity_x; + interface_R_imh.velocity_y = interface_R_imh.velocity_y + qx * del_m_i.velocity_y; + interface_R_imh.velocity_z = interface_R_imh.velocity_z + qx * del_m_i.velocity_z; + interface_R_imh.pressure = interface_R_imh.pressure + qx * del_m_i.pressure; + + qx = 0.5 * fmax(lambda_p, 0.0) * dtodx; + interface_L_iph.density = interface_L_iph.density - qx * del_m_i.density; + interface_L_iph.velocity_x = interface_L_iph.velocity_x - qx * del_m_i.velocity_x; + interface_L_iph.velocity_y = interface_L_iph.velocity_y - qx * del_m_i.velocity_y; + interface_L_iph.velocity_z = interface_L_iph.velocity_z - qx * del_m_i.velocity_z; + interface_L_iph.pressure = interface_L_iph.pressure - qx * del_m_i.pressure; + + #ifdef DE + interface_R_imh.gas_energy = interface_R_imh.gas_energy + qx * del_m_i.gas_energy; + interface_L_iph.gas_energy = interface_L_iph.gas_energy - qx * del_m_i.gas_energy; + #endif // DE + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] = interface_R_imh.scalar[i] + qx * del_m_i.scalar[i]; + interface_L_iph.scalar[i] = interface_L_iph.scalar[i] - qx * del_m_i.scalar[i]; } + #endif // SCALAR + + // Perform the characteristic tracing + // Stone Eqns 42 & 43 - // declare primitive variables for each stencil - // these will be placed into registers for each thread - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - - // declare other variables to be used - Real a_i; - Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; - Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; - Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; - Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G; - Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; - Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; - Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; - Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; - Real lim_slope_a, lim_slope_b; - Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; - Real d_L_iph, vx_L_iph, vy_L_iph, vz_L_iph, p_L_iph; - Real d_R_imh, vx_R_imh, vy_R_imh, vz_R_imh, p_R_imh; - Real C; - #ifndef VL - Real dtodx = dt/dx; - Real lambda_m, lambda_0, lambda_p; - Real qx; - Real lamdiff; - Real sum_0, sum_1, sum_2, sum_3, sum_4; - #endif // not VL + // left-hand interface value, i+1/2 + Real sum_0 = 0.0, sum_1 = 0.0, sum_2 = 0.0, sum_3 = 0.0, sum_4 = 0.0; #ifdef DE - Real ge_i, ge_imo, ge_ipo; - Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; - Real del_ge_m_i; - Real ge_L_iph, ge_R_imh; - Real E, E_kin, dge; - #ifndef VL - Real sum_ge; - #endif // not VL - #endif + Real sum_ge = 0; + #endif // DE #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; - Real del_scalar_m_i[NSCALARS]; - Real scalar_L_iph[NSCALARS], scalar_R_imh[NSCALARS]; - #ifndef VL Real sum_scalar[NSCALARS]; - #endif // not VL - #endif - - // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; - int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - - int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 1; xe = nx-2; - ys = 0; ye = ny; - zs = 0; ze = nz; + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] = 0.0; } - if (dir == 1) { - xs = 0; xe = nx; - ys = 1; ye = ny-2; - zs = 0; ze = nz; + #endif // SCALAR + if (lambda_m >= 0) { + Real lamdiff = lambda_p - lambda_m; + + sum_0 += lamdiff * + (-cell_i.density * del_m_i.velocity_x / (2 * sound_speed) + del_m_i.pressure / (2 * sound_speed_squared)); + sum_1 += lamdiff * (del_m_i.velocity_x / 2.0 - del_m_i.pressure / (2 * sound_speed * cell_i.density)); + sum_4 += lamdiff * (-cell_i.density * del_m_i.velocity_x * sound_speed / 2.0 + del_m_i.pressure / 2.0); } - if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 1; ze = nz-2; - } - - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { - // load the 3-cell stencil into registers - // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); - #ifdef SCALAR - for (int i=0; i 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } - - #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5*(ge_ipo - ge_imo); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } - } - #endif - - - // Project the left, right, centered and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A - del_a_0_L = -d_i * del_vx_L / (2*a_i) + del_p_L / (2*a_i*a_i); - del_a_1_L = del_d_L - del_p_L / (a_i*a_i); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = d_i * del_vx_L / (2*a_i) + del_p_L / (2*a_i*a_i); - - del_a_0_R = -d_i * del_vx_R / (2*a_i) + del_p_R / (2*a_i*a_i); - del_a_1_R = del_d_R - del_p_R / (a_i*a_i); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = d_i * del_vx_R / (2*a_i) + del_p_R / (2*a_i*a_i); - - del_a_0_C = -d_i * del_vx_C / (2*a_i) + del_p_C / (2*a_i*a_i); - del_a_1_C = del_d_C - del_p_C / (a_i*a_i); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = d_i * del_vx_C / (2*a_i) + del_p_C / (2*a_i*a_i); - - del_a_0_G = -d_i * del_vx_G / (2*a_i) + del_p_G / (2*a_i*a_i); - del_a_1_G = del_d_G - del_p_G / (a_i*a_i); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = d_i * del_vx_G / (2*a_i) + del_p_G / (2*a_i*a_i); - - - // Apply monotonicity constraints to the differences in the characteristic variables - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L*del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin(2.0*lim_slope_a, lim_slope_b); - } - if (del_a_1_L*del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin(2.0*lim_slope_a, lim_slope_b); - } - if (del_a_2_L*del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin(2.0*lim_slope_a, lim_slope_b); - } - if (del_a_3_L*del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin(2.0*lim_slope_a, lim_slope_b); - } - if (del_a_4_L*del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin(2.0*lim_slope_a, lim_slope_b); - } - #ifdef DE - del_ge_m_i = 0.0; - if (del_ge_L*del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin(2.0*lim_slope_a, lim_slope_b); - } - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin(2.0*lim_slope_a, lim_slope_b); - } - } - #endif - - - - // Project the monotonized difference in the characteristic variables back onto the - // primitive variables - // Stone Eqn 39 - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a_i*del_a_0_m / d_i + a_i* del_a_4_m / d_i; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = a_i*a_i*del_a_0_m + a_i*a_i*del_a_4_m; - - - // Compute the left and right interface values using the monotonized difference in the - // primitive variables - - d_R_imh = d_i - 0.5*del_d_m_i; - vx_R_imh = vx_i - 0.5*del_vx_m_i; - vy_R_imh = vy_i - 0.5*del_vy_m_i; - vz_R_imh = vz_i - 0.5*del_vz_m_i; - p_R_imh = p_i - 0.5*del_p_m_i; - - d_L_iph = d_i + 0.5*del_d_m_i; - vx_L_iph = vx_i + 0.5*del_vx_m_i; - vy_L_iph = vy_i + 0.5*del_vy_m_i; - vz_L_iph = vz_i + 0.5*del_vz_m_i; - p_L_iph = p_i + 0.5*del_p_m_i; - - #ifdef DE - ge_R_imh = ge_i - 0.5*del_ge_m_i; - ge_L_iph = ge_i + 0.5*del_ge_m_i; - #endif - #ifdef SCALAR - for (int i=0; i= 0) { + Real lamdiff = lambda_p - lambda_0; - - // Perform the characteristic tracing - // Stone Eqns 42 & 43 - - // left-hand interface value, i+1/2 - sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0; - #ifdef DE - sum_ge = 0; - #endif - #ifdef SCALAR - for (int i=0; i= 0) - { - lamdiff = lambda_p - lambda_m; - - sum_0 += lamdiff * (-d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i)); - sum_1 += lamdiff * (del_vx_m_i/2.0 - del_p_m_i/(2*a_i*d_i)); - sum_4 += lamdiff * (-d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0); - } - if (lambda_0 >= 0) - { - lamdiff = lambda_p - lambda_0; - - sum_0 += lamdiff * (del_d_m_i - del_p_m_i/(a_i*a_i)); - sum_2 += lamdiff * del_vy_m_i; - sum_3 += lamdiff * del_vz_m_i; - #ifdef DE - sum_ge += lamdiff * del_ge_m_i; - #endif - #ifdef SCALAR - for (int i=0; i= 0) - { - lamdiff = lambda_p - lambda_p; - - sum_0 += lamdiff * (d_i*del_vx_m_i/(2*a_i) + del_p_m_i/(2*a_i*a_i)); - sum_1 += lamdiff * (del_vx_m_i/2.0 + del_p_m_i/(2*a_i*d_i)); - sum_4 += lamdiff * (d_i*del_vx_m_i*a_i/2.0 + del_p_m_i/2.0); + sum_0 += lamdiff * (del_m_i.density - del_m_i.pressure / (sound_speed_squared)); + sum_2 += lamdiff * del_m_i.velocity_y; + sum_3 += lamdiff * del_m_i.velocity_z; + #ifdef DE + sum_ge += lamdiff * del_m_i.gas_energy; + #endif // DE + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += lamdiff * del_m_i.scalar[i]; } + #endif // SCALAR + } + if (lambda_p >= 0) { + Real lamdiff = lambda_p - lambda_p; - // add the corrections to the initial guesses for the interface values - d_L_iph += 0.5*dtodx*sum_0; - vx_L_iph += 0.5*dtodx*sum_1; - vy_L_iph += 0.5*dtodx*sum_2; - vz_L_iph += 0.5*dtodx*sum_3; - p_L_iph += 0.5*dtodx*sum_4; - #ifdef DE - ge_L_iph += 0.5*dtodx*sum_ge; - #endif - #ifdef SCALAR - for (int i=0; i +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include + +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/plmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" + +TEST(tHYDROPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ +#ifndef VL + std::cerr << "Warning: The tHYDROPlmcReconstructor.CorrectInputExpectCorrectOutput only supports the Van Leer (VL) " + "integrator" + << std::endl; + return; +#endif // VL + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 5; + size_t const ny = 4; + size_t const nz = 4; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (Real &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{26, 3.8877922383184833}, + {27, 0.70033864721549188}, + {106, 5.6625525038177784}, + {107, 3.0633780053857027}, + {186, 4.0069556576401011}, + {187, 2.1015872413794123}, + {266, 5.1729859852329314}, + {267, 3.9675148506537838}, + {346, 9.6301414677176531}, + {347, 21.091316282933843}}, + {{21, 0.74780807318015607}, + {37, 0.19457128219588618}, + {101, 5.6515522777659895}, + {117, 4.4286255636679313}, + {181, 0.13115998072061905}, + {197, 2.2851440769830953}, + {261, 1.5834637771067519}, + {277, 2.697375839048191}, + {341, 23.043749364531674}, + {357, 82.515887983144168}}, + {{25, 2.2863650183226212}, + {29, 1.686415421301841}, + {105, 0.72340346106443465}, + {109, 5.9563546443402542}, + {185, 3.6128571662018358}, + {189, 5.3735653401079038}, + {265, 0.95177493689267167}, + {269, 0.46056494878491938}, + {345, 3.1670194578067843}, + {349, 19.142817472509272}}}; + + std::vector> fiducial_interface_right = + + {{{25, 3.8877922383184833}, + {26, 0.70033864721549188}, + {105, 1.594778794367564}, + {106, 3.0633780053857027}, + {185, 4.0069556576401011}, + {186, 2.1015872413794123}, + {265, 1.7883678016935782}, + {266, 3.9675148506537838}, + {345, 2.8032969746372531}, + {346, 21.091316282933843}}, + {{17, 0.43265217076853835}, + {33, 0.19457128219588618}, + {97, 3.2697645945288754}, + {113, 4.4286255636679313}, + {177, 0.07588397666718491}, + {193, 2.2851440769830953}, + {257, 0.91612950577699748}, + {273, 2.697375839048191}, + {337, 13.332201861384396}, + {353, 82.515887983144168}}, + {{5, 2.2863650183226212}, + {9, 1.686415421301841}, + {85, 0.72340346106443465}, + {89, 1.77925054463361}, + {165, 5.3997753452111859}, + {169, 1.4379190463124141}, + {245, 0.95177493689267167}, + {249, 0.46056494878491938}, + {325, 6.6889498465051398}, + {329, 1.6145084086614285}}} + + ; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Assign the shape + size_t nx_rot, ny_rot, nz_rot; + switch (direction) { + case 0: + nx_rot = nx; + ny_rot = ny; + nz_rot = nz; + break; + case 1: + nx_rot = ny; + ny_rot = nz; + nz_rot = nx; + break; + case 2: + nx_rot = nz; + ny_rot = nx; + nz_rot = ny; + break; + } + + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); + + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx_rot, ny_rot, nz_rot, dx, dt, gamma, direction, n_fields); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + // if (test_val != 0.0) std::cout << "{" << i << ", " << to_string_exact(test_val) << "}," << std::endl; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} + +TEST(tMHDPlmcReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 4, ny = nx, nz = nx; + size_t const n_fields = 8; + size_t const n_cells_grid = nx * ny * nz * n_fields; + size_t const n_cells_interface = nx * ny * nz * (n_fields - 1); + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(n_cells_grid); + for (Real &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{21, 0.59023012197434721}, + {85, 3.0043379408547275}, + {149, 2.6320759184913625}, + {213, 0.9487867623146744}, + {277, 18.551193003661723}, + {341, 1.8587936590169301}, + {405, 2.1583975283044725}}, + {{21, 0.73640639402573249}, + {85, 3.3462413154443715}, + {149, 2.1945584994458125}, + {213, 1.1837630990406585}, + {277, 17.570011907061254}, + {341, 2.1583975283044725}, + {405, 1.7033818819502551}}, + {{21, 0.25340904981266843}, + {85, 2.0441984720128734}, + {149, 1.9959059157695584}, + {213, 0.45377591914009824}, + {277, 24.018953780483471}, + {341, 1.7033818819502551}, + {405, 1.8587936590169301}}}; + std::vector> fiducial_interface_right = {{{20, 0.59023012197434721}, + {84, 3.0043379408547275}, + {148, 2.6320759184913625}, + {212, 0.9487867623146744}, + {276, 22.111134849009044}, + {340, 1.8587936590169301}, + {404, 2.1583975283044725}}, + { + {17, 0.44405384992296193}, + {81, 2.5027813113931279}, + {145, 2.6371119205792346}, + {209, 1.0210845222961809}, + {273, 21.353253570231175}, + {337, 2.1634182515826184}, + {401, 1.7033818819502551}, + }, + { + {5, 0.92705119413602599}, + {69, 1.9592598982258778}, + {133, 0.96653490574340428}, + {197, 1.3203867992383289}, + {261, 7.9217487636977353}, + {325, 1.8629714367312684}, + {389, 1.8587936590169301}, + }}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(n_cells_interface, true); + cuda_utilities::DeviceVector dev_interface_right(n_cells_interface, true); + + // Launch kernel + hipLaunchKernelGGL(PLMC_cuda, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction, n_fields); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < dev_interface_right.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} diff --git a/src/reconstruction/plmp_cuda.cu b/src/reconstruction/plmp_cuda.cu index 2a6b637f7..e8cfa0d09 100644 --- a/src/reconstruction/plmp_cuda.cu +++ b/src/reconstruction/plmp_cuda.cu @@ -1,34 +1,42 @@ /*! \file plmp_cuda.cu * \brief Definitions of the piecewise linear reconstruction functions for with limiting in the primitive variables. */ -#ifdef CUDA -#include "../utils/gpu.hpp" #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/plmp_cuda.h" +#include "../utils/gpu.hpp" -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" #endif - -/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using plm. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plm. */ +__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } // declare primitive variables in the stencil @@ -44,193 +52,213 @@ __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real mx_L, my_L, mz_L, E_L; Real mx_R, my_R, mz_R, E_R; - #ifdef DE +#ifdef DE Real ge_i, ge_imo, ge_ipo, ge_L, ge_R, dge_L, dge_R, E_kin, E, dge; - #endif - #ifdef SCALAR +#endif // DE +#ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS], dscalar_L[NSCALARS], dscalar_R[NSCALARS]; - #endif +#endif // SCALAR - #ifndef VL //Don't use velocities to reconstruct when using VL - Real dtodx = dt/dx; +#ifndef VL // Don't use velocities to reconstruct when using VL + Real dtodx = dt / dx; Real dfl, dfr, mxfl, mxfr, myfl, myfr, mzfl, mzfr, Efl, Efr; #ifdef DE Real gefl, gefr; - #endif + #endif // DE #ifdef SCALAR Real scalarfl[NSCALARS], scalarfr[NSCALARS]; - #endif - #endif + #endif // SCALAR +#endif // VL // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; if (dir == 0) { - xs = 1; xe = nx-2; - ys = 0; ye = ny; - zs = 0; ze = nz; + xs = 1; + xe = nx - 2; + ys = 0; + ye = ny; + zs = 0; + ze = nz; } if (dir == 1) { - xs = 0; xe = nx; - ys = 1; ye = ny-2; - zs = 0; ze = nz; + xs = 0; + xe = nx; + ys = 1; + ye = ny - 2; + zs = 0; + ze = nz; } if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 1; ze = nz-2; + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 1; + ze = nz - 2; } - - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { + if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { // load the 3-cell stencil into registers // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); - #ifdef SCALAR - for (int i=0; i 0.0) { del_q_G = 2.0*del_q_L*del_q_R / (del_q_L+del_q_R); } - else { del_q_G = 0.0; } + if (del_q_L * del_q_R > 0.0) { + del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R); + } else { + del_q_G = 0.0; + } // Monotonize the differences lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R)); lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G)); // Minmod limiter - //del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); + // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); // Van Leer limiter - del_q_m = sgn_CUDA(del_q_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - + del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); // Calculate the left and right interface values using the limited slopes - *q_L = q_i - 0.5*del_q_m; - *q_R = q_i + 0.5*del_q_m; - + *q_L = q_i - 0.5 * del_q_m; + *q_R = q_i + 0.5 * del_q_m; } - - -#endif //CUDA diff --git a/src/reconstruction/plmp_cuda.h b/src/reconstruction/plmp_cuda.h index 9cf5f01a3..34faa14df 100644 --- a/src/reconstruction/plmp_cuda.h +++ b/src/reconstruction/plmp_cuda.h @@ -1,25 +1,24 @@ /*! \file plmp_cuda.h * \brief Declarations of the cuda plmp kernels. */ -#ifdef CUDA - #ifndef PLMP_CUDA_H #define PLMP_CUDA_H - #include "../global/global.h" -/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using plmp. */ -__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); - - -/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using linear reconstruction - in the primitive variables with Van Leer or Minmod slope limiting. */ +/*! \fn __global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using plmp. */ +__global__ void PLMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); + +/*! \fn __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, + Real *q_L, Real *q_R) + * \brief Calculates the left and right interface values for a cell using + linear reconstruction in the primitive variables with Van Leer or Minmod slope + limiting. */ __device__ void Interface_Values_PLM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R); - -#endif // PLMP_CUDA_H -#endif // CUDA +#endif // PLMP_CUDA_H diff --git a/src/reconstruction/ppmc_cuda.cu b/src/reconstruction/ppmc_cuda.cu index 2ca1b62df..4db993d70 100644 --- a/src/reconstruction/ppmc_cuda.cu +++ b/src/reconstruction/ppmc_cuda.cu @@ -1,1101 +1,696 @@ /*! \file ppmc_cuda.cu - * \brief Functions definitions for the ppm kernels, using characteristic tracing. - Written following Stone et al. 2008. */ -#ifdef CUDA -#ifdef PPMC + * \brief Functions definitions for the ppm kernels, using characteristic + tracing. Written following Stone et al. 2008. */ -#include "../utils/gpu.hpp" #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../reconstruction/ppmc_cuda.h" - -#ifdef DE //PRESSURE_DE +#include "../reconstruction/reconstruction.h" +#include "../utils/gpu.hpp" #include "../utils/hydro_utilities.h" -#endif +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" +#endif -/*! \fn void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +// ===================================================================================================================== +/*! + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using ppm. */ +__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir) { - int n_cells = nx*ny*nz; - int o1, o2, o3; - if (dir == 0 ) { - o1 = 1; o2 = 2; o3 = 3; - } - if (dir == 1 ) { - o1 = 2; o2 = 3; o3 = 1; - } - if (dir == 2 ) { - o1 = 3; o2 = 1; o3 = 2; - } + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); - // declare primitive variables for each stencil - // these will be placed into registers for each thread - Real d_i, vx_i, vy_i, vz_i, p_i; - Real d_imo, vx_imo, vy_imo, vz_imo, p_imo; - Real d_ipo, vx_ipo, vy_ipo, vz_ipo, p_ipo; - Real d_imt, vx_imt, vy_imt, vz_imt, p_imt; - Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; - - // declare other variables to be used - Real a; - Real del_d_L, del_vx_L, del_vy_L, del_vz_L, del_p_L; - Real del_d_R, del_vx_R, del_vy_R, del_vz_R, del_p_R; - Real del_d_C, del_vx_C, del_vy_C, del_vz_C, del_p_C; - Real del_d_G, del_vx_G, del_vy_G, del_vz_G, del_p_G; - Real del_a_0_L, del_a_1_L, del_a_2_L, del_a_3_L, del_a_4_L; - Real del_a_0_R, del_a_1_R, del_a_2_R, del_a_3_R, del_a_4_R; - Real del_a_0_C, del_a_1_C, del_a_2_C, del_a_3_C, del_a_4_C; - Real del_a_0_G, del_a_1_G, del_a_2_G, del_a_3_G, del_a_4_G; - Real del_a_0_m, del_a_1_m, del_a_2_m, del_a_3_m, del_a_4_m; - Real lim_slope_a, lim_slope_b; - Real del_d_m_imo, del_vx_m_imo, del_vy_m_imo, del_vz_m_imo, del_p_m_imo; - Real del_d_m_i, del_vx_m_i, del_vy_m_i, del_vz_m_i, del_p_m_i; - Real del_d_m_ipo, del_vx_m_ipo, del_vy_m_ipo, del_vz_m_ipo, del_p_m_ipo; - Real d_L, vx_L, vy_L, vz_L, p_L; - Real d_R, vx_R, vy_R, vz_R, p_R; - - // #ifdef CTU - #ifndef VL - Real dtodx = dt/dx; - Real d_6, vx_6, vy_6, vz_6, p_6; - Real lambda_m, lambda_0, lambda_p; - Real lambda_max, lambda_min; - Real A, B, C, D; - Real chi_1, chi_2, chi_3, chi_4, chi_5; - Real sum_1, sum_2, sum_3, sum_4, sum_5; - #endif //CTU - - #ifdef DE - Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt; - Real del_ge_L, del_ge_R, del_ge_C, del_ge_G; - Real del_ge_m_imo, del_ge_m_i, del_ge_m_ipo; - Real ge_L, ge_R; - Real E_kin, E, dge; - // #ifdef CTU - #ifndef VL - Real chi_ge, sum_ge, ge_6; - #endif - #endif - #ifdef SCALAR - Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; - Real del_scalar_L[NSCALARS], del_scalar_R[NSCALARS], del_scalar_C[NSCALARS], del_scalar_G[NSCALARS]; - Real del_scalar_m_imo[NSCALARS], del_scalar_m_i[NSCALARS], del_scalar_m_ipo[NSCALARS]; - Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - // #ifdef CTU - #ifndef VL - Real chi_scalar[NSCALARS], sum_scalar[NSCALARS], scalar_6[NSCALARS]; - #endif - #endif + if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { + return; + } + // Compute the total number of cells + int const n_cells = nx * ny * nz; - // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; - - int xs, xe, ys, ye, zs, ze; - if (dir == 0) { - xs = 2; xe = nx-3; - ys = 0; ye = ny; - zs = 0; ze = nz; - } - if (dir == 1) { - xs = 0; xe = nx; - ys = 2; ye = ny-3; - zs = 0; ze = nz; - } - if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 2; ze = nz-3; + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { - // load the 5-cell stencil into registers - // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); - #ifdef DE - ge_i = dge / d_i; - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } - - #ifdef DE - del_ge_L = ge_imo - ge_imt; - del_ge_R = ge_i - ge_imo; - del_ge_C = 0.5*(ge_i - ge_imt); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } - } - #endif + // load the 5-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + // cell i-1. The equality checks check the direction and subtracts one from the direction + // im1 stands for "i minus 1" + reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - // Step 3 - Project the left, right, centered and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A + // cell i+1. The equality checks check the direction and adds one to the direction + // ip1 stands for "i plus 1" + reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - del_a_0_L = -0.5*d_imo*del_vx_L/a + 0.5*del_p_L/(a*a); - del_a_1_L = del_d_L - del_p_L/(a*a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5*d_imo*del_vx_L/a + 0.5*del_p_L/(a*a); + // cell i-2. The equality checks check the direction and subtracts one from the direction + // im2 stands for "i minus 2" + reconstruction::Primitive const cell_im2 = + reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), + zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - del_a_0_R = -0.5*d_imo*del_vx_R/a + 0.5*del_p_R/(a*a); - del_a_1_R = del_d_R - del_p_R/(a*a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5*d_imo*del_vx_R/a + 0.5*del_p_R/(a*a); + // cell i+2. The equality checks check the direction and adds one to the direction + // ip2 stands for "i plus 2" + reconstruction::Primitive const cell_ip2 = + reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), + zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); - del_a_0_C = -0.5*d_imo*del_vx_C/a + 0.5*del_p_C/(a*a); - del_a_1_C = del_d_C - del_p_C/(a*a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5*d_imo*del_vx_C/a + 0.5*del_p_C/(a*a); + // Steps 2 - 5 are repeated for cell i-1, i, and i+1 - del_a_0_G = -0.5*d_imo*del_vx_G/a + 0.5*del_p_G/(a*a); - del_a_1_G = del_d_G - del_p_G/(a*a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5*d_imo*del_vx_G/a + 0.5*del_p_G/(a*a); + // =============== + // Cell i-1 slopes + // =============== + // calculate the adiabatic sound speed in cell im1 + Real sound_speed = hydro_utilities::Calc_Sound_Speed(cell_im1.pressure, cell_im1.density, gamma); + // this isn't actually used and the compiler should optimize it away but since this is the only reconstruction + // function that won't use it it was easier to add it here as an unused variable + reconstruction::EigenVecs eigenvector; - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables - // Stone Eqn 38 + // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables. Note that here L + // and R refer to locations relative to the cell center Stone Eqn 36 - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + // left + reconstruction::Primitive del_L = reconstruction::Compute_Slope(cell_im2, cell_im1); - if (del_a_0_L*del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_1_L*del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_2_L*del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_3_L*del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_4_L*del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - #ifdef DE - if (del_ge_L*del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_imo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_ge_m_imo = 0.0; - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_imo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_scalar_m_imo[i] = 0.0; - } - #endif - - - // Step 5 - Project the monotonized difference in the characteristic variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_imo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_imo = -a*del_a_0_m/d_imo + a*del_a_4_m/d_imo; - del_vy_m_imo = del_a_2_m; - del_vz_m_imo = del_a_3_m; - del_p_m_imo = a*a*del_a_0_m + a*a*del_a_4_m; - - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables - // Note that here L and R refer to locations relative to the cell center - // Stone Eqn 36 - - // calculate the adiabatic sound speed in cell i - a = sqrt(gamma*p_i/d_i); - - // left - del_d_L = d_i - d_imo; - del_vx_L = vx_i - vx_imo; - del_vy_L = vy_i - vy_imo; - del_vz_L = vz_i - vz_imo; - del_p_L = p_i - p_imo; - - // right - del_d_R = d_ipo - d_i; - del_vx_R = vx_ipo - vx_i; - del_vy_R = vy_ipo - vy_i; - del_vz_R = vz_ipo - vz_i; - del_p_R = p_ipo - p_i; - - // centered - del_d_C = 0.5*(d_ipo - d_imo); - del_vx_C = 0.5*(vx_ipo - vx_imo); - del_vy_C = 0.5*(vy_ipo - vy_imo); - del_vz_C = 0.5*(vz_ipo - vz_imo); - del_p_C = 0.5*(p_ipo - p_imo); - - // van Leer - if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } - - #ifdef DE - del_ge_L = ge_i - ge_imo; - del_ge_R = ge_ipo - ge_i; - del_ge_C = 0.5*(ge_ipo - ge_imo); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif - - #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } - } - #endif - - // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A - - del_a_0_L = -0.5*d_i*del_vx_L/a + 0.5*del_p_L/(a*a); - del_a_1_L = del_d_L - del_p_L/(a*a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5*d_i*del_vx_L/a + 0.5*del_p_L/(a*a); - - del_a_0_R = -0.5*d_i*del_vx_R/a + 0.5*del_p_R/(a*a); - del_a_1_R = del_d_R - del_p_R/(a*a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5*d_i*del_vx_R/a + 0.5*del_p_R/(a*a); - - del_a_0_C = -0.5*d_i*del_vx_C/a + 0.5*del_p_C/(a*a); - del_a_1_C = del_d_C - del_p_C/(a*a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5*d_i*del_vx_C/a + 0.5*del_p_C/(a*a); - - del_a_0_G = -0.5*d_i*del_vx_G/a + 0.5*del_p_G/(a*a); - del_a_1_G = del_d_G - del_p_G/(a*a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5*d_i*del_vx_G/a + 0.5*del_p_G/(a*a); - - - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables - // Stone Eqn 38 - - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; - - if (del_a_0_L*del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_1_L*del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_2_L*del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_3_L*del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_4_L*del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - #ifdef DE - if (del_ge_L*del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_i = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_ge_m_i = 0.0; - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_i[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_scalar_m_i[i] = 0.0; - } - #endif - - - // Step 5 - Project the monotonized difference in the characteristic variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_i = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_i = -a*del_a_0_m/d_i + a*del_a_4_m/d_i; - del_vy_m_i = del_a_2_m; - del_vz_m_i = del_a_3_m; - del_p_m_i = a*a*del_a_0_m + a*a*del_a_4_m; - - - // Step 2 - Compute the left, right, centered, and van Leer differences of the primitive variables - // Note that here L and R refer to locations relative to the cell center - // Stone Eqn 36 - - - // calculate the adiabatic sound speed in cell ipo - a = sqrt(gamma*p_ipo/d_ipo); - - // left - del_d_L = d_ipo - d_i; - del_vx_L = vx_ipo - vx_i; - del_vy_L = vy_ipo - vy_i; - del_vz_L = vz_ipo - vz_i; - del_p_L = p_ipo - p_i; - - // right - del_d_R = d_ipt - d_ipo; - del_vx_R = vx_ipt - vx_ipo; - del_vy_R = vy_ipt - vy_ipo; - del_vz_R = vz_ipt - vz_ipo; - del_p_R = p_ipt - p_ipo; - - // centered - del_d_C = 0.5*(d_ipt - d_i); - del_vx_C = 0.5*(vx_ipt- vx_i); - del_vy_C = 0.5*(vy_ipt - vy_i); - del_vz_C = 0.5*(vz_ipt - vz_i); - del_p_C = 0.5*(p_ipt - p_i); - - // van Leer - if (del_d_L*del_d_R > 0.0) { del_d_G = 2.0*del_d_L*del_d_R / (del_d_L+del_d_R); } - else { del_d_G = 0.0; } - if (del_vx_L*del_vx_R > 0.0) { del_vx_G = 2.0*del_vx_L*del_vx_R / (del_vx_L+del_vx_R); } - else { del_vx_G = 0.0; } - if (del_vy_L*del_vy_R > 0.0) { del_vy_G = 2.0*del_vy_L*del_vy_R / (del_vy_L+del_vy_R); } - else { del_vy_G = 0.0; } - if (del_vz_L*del_vz_R > 0.0) { del_vz_G = 2.0*del_vz_L*del_vz_R / (del_vz_L+del_vz_R); } - else { del_vz_G = 0.0; } - if (del_p_L*del_p_R > 0.0) { del_p_G = 2.0*del_p_L*del_p_R / (del_p_L+del_p_R); } - else { del_p_G = 0.0; } - - #ifdef DE - del_ge_L = ge_ipo - ge_i; - del_ge_R = ge_ipt - ge_ipo; - del_ge_C = 0.5*(ge_ipt- ge_i); - if (del_ge_L*del_ge_R > 0.0) { del_ge_G = 2.0*del_ge_L*del_ge_R / (del_ge_L+del_ge_R); } - else { del_ge_G = 0.0; } - #endif - - #ifdef SCALAR - for (int i=0; i 0.0) { del_scalar_G[i] = 2.0*del_scalar_L[i]*del_scalar_R[i] / (del_scalar_L[i]+del_scalar_R[i]); } - else { del_scalar_G[i] = 0.0; } - } - #endif + // right + reconstruction::Primitive del_R = reconstruction::Compute_Slope(cell_im1, cell_i); + // centered + reconstruction::Primitive del_C = reconstruction::Compute_Slope(cell_im2, cell_i, 0.5); - // Step 3 - Project the left, right, centered, and van Leer differences onto the characteristic variables - // Stone Eqn 37 (del_a are differences in characteristic variables, see Stone for notation) - // Use the eigenvectors given in Stone 2008, Appendix A + // Van Leer + reconstruction::Primitive del_G = reconstruction::Van_Leer_Slope(del_L, del_R); - del_a_0_L = -0.5*d_ipo*del_vx_L/a + 0.5*del_p_L/(a*a); - del_a_1_L = del_d_L - del_p_L/(a*a); - del_a_2_L = del_vy_L; - del_a_3_L = del_vz_L; - del_a_4_L = 0.5*d_ipo*del_vx_L/a + 0.5*del_p_L/(a*a); + // Step 3 - Project the left, right, centered and van Leer differences onto the + // characteristic variables Stone Eqn 37 (del_a are differences in + // characteristic variables, see Stone for notation) Use the eigenvectors + // given in Stone 2008, Appendix A + reconstruction::Characteristic del_a_L = reconstruction::Primitive_To_Characteristic( + cell_im1, del_L, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - del_a_0_R = -0.5*d_ipo*del_vx_R/a + 0.5*del_p_R/(a*a); - del_a_1_R = del_d_R - del_p_R/(a*a); - del_a_2_R = del_vy_R; - del_a_3_R = del_vz_R; - del_a_4_R = 0.5*d_ipo*del_vx_R/a + 0.5*del_p_R/(a*a); + reconstruction::Characteristic del_a_R = reconstruction::Primitive_To_Characteristic( + cell_im1, del_R, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - del_a_0_C = -0.5*d_ipo*del_vx_C/a + 0.5*del_p_C/(a*a); - del_a_1_C = del_d_C - del_p_C/(a*a); - del_a_2_C = del_vy_C; - del_a_3_C = del_vz_C; - del_a_4_C = 0.5*d_ipo*del_vx_C/a + 0.5*del_p_C/(a*a); + reconstruction::Characteristic del_a_C = reconstruction::Primitive_To_Characteristic( + cell_im1, del_C, eigenvector, sound_speed, sound_speed * sound_speed, gamma); - del_a_0_G = -0.5*d_ipo*del_vx_G/a + 0.5*del_p_G/(a*a); - del_a_1_G = del_d_G - del_p_G/(a*a); - del_a_2_G = del_vy_G; - del_a_3_G = del_vz_G; - del_a_4_G = 0.5*d_ipo*del_vx_G/a + 0.5*del_p_G/(a*a); + reconstruction::Characteristic del_a_G = reconstruction::Primitive_To_Characteristic( + cell_im1, del_G, eigenvector, sound_speed, sound_speed * sound_speed, gamma); + // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables + // Step 5 - and project the monotonized difference in the characteristic variables back onto the primitive variables + // Stone Eqn 39 + reconstruction::Primitive const del_m_im1 = reconstruction::Monotonize_Characteristic_Return_Primitive( + cell_im1, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvector, sound_speed, + sound_speed * sound_speed, gamma); - // Step 4 - Apply monotonicity constraints to the differences in the characteristic variables - // Stone Eqn 38 + // ============= + // Cell i slopes + // ============= - del_a_0_m = del_a_1_m = del_a_2_m = del_a_3_m = del_a_4_m = 0.0; + // calculate the adiabatic sound speed in cell i + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); - if (del_a_0_L*del_a_0_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_0_L), fabs(del_a_0_R)); - lim_slope_b = fmin(fabs(del_a_0_C), fabs(del_a_0_G)); - del_a_0_m = sgn_CUDA(del_a_0_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_1_L*del_a_1_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_1_L), fabs(del_a_1_R)); - lim_slope_b = fmin(fabs(del_a_1_C), fabs(del_a_1_G)); - del_a_1_m = sgn_CUDA(del_a_1_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_2_L*del_a_2_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_2_L), fabs(del_a_2_R)); - lim_slope_b = fmin(fabs(del_a_2_C), fabs(del_a_2_G)); - del_a_2_m = sgn_CUDA(del_a_2_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_3_L*del_a_3_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_3_L), fabs(del_a_3_R)); - lim_slope_b = fmin(fabs(del_a_3_C), fabs(del_a_3_G)); - del_a_3_m = sgn_CUDA(del_a_3_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - if (del_a_4_L*del_a_4_R > 0.0) { - lim_slope_a = fmin(fabs(del_a_4_L), fabs(del_a_4_R)); - lim_slope_b = fmin(fabs(del_a_4_C), fabs(del_a_4_G)); - del_a_4_m = sgn_CUDA(del_a_4_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - #ifdef DE - if (del_ge_L*del_ge_R > 0.0) { - lim_slope_a = fmin(fabs(del_ge_L), fabs(del_ge_R)); - lim_slope_b = fmin(fabs(del_ge_C), fabs(del_ge_G)); - del_ge_m_ipo = sgn_CUDA(del_ge_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_ge_m_ipo = 0.0; - #endif - #ifdef SCALAR - for (int i=0; i 0.0) { - lim_slope_a = fmin(fabs(del_scalar_L[i]), fabs(del_scalar_R[i])); - lim_slope_b = fmin(fabs(del_scalar_C[i]), fabs(del_scalar_G[i])); - del_scalar_m_ipo[i] = sgn_CUDA(del_scalar_C[i]) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); - } - else del_scalar_m_ipo[i] = 0.0; - } - #endif - - - // Step 5 - Project the monotonized difference in the characteristic variables back onto the - // primitive variables - // Stone Eqn 39 - - del_d_m_ipo = del_a_0_m + del_a_1_m + del_a_4_m; - del_vx_m_ipo = -a*del_a_0_m / d_ipo + a* del_a_4_m / d_ipo; - del_vy_m_ipo = del_a_2_m; - del_vz_m_ipo = del_a_3_m; - del_p_m_ipo = a*a*del_a_0_m + a*a*del_a_4_m; - - - // Step 6 - Use parabolic interpolation to compute values at the left and right of each cell center - // Here, the subscripts L and R refer to the left and right side of the ith cell center - // Stone Eqn 46 - - d_L = 0.5*(d_i + d_imo) - (del_d_m_i - del_d_m_imo) / 6.0; - vx_L = 0.5*(vx_i + vx_imo) - (del_vx_m_i - del_vx_m_imo) / 6.0; - vy_L = 0.5*(vy_i + vy_imo) - (del_vy_m_i - del_vy_m_imo) / 6.0; - vz_L = 0.5*(vz_i + vz_imo) - (del_vz_m_i - del_vz_m_imo) / 6.0; - p_L = 0.5*(p_i + p_imo) - (del_p_m_i - del_p_m_imo) / 6.0; - - d_R = 0.5*(d_ipo + d_i) - (del_d_m_ipo - del_d_m_i) / 6.0; - vx_R = 0.5*(vx_ipo + vx_i) - (del_vx_m_ipo - del_vx_m_i) / 6.0; - vy_R = 0.5*(vy_ipo + vy_i) - (del_vy_m_ipo - del_vy_m_i) / 6.0; - vz_R = 0.5*(vz_ipo + vz_i) - (del_vz_m_ipo - del_vz_m_i) / 6.0; - p_R = 0.5*(p_ipo + p_i) - (del_p_m_ipo - del_p_m_i) / 6.0; - - #ifdef DE - ge_L = 0.5*(ge_i + ge_imo) - (del_ge_m_i - del_ge_m_imo) / 6.0; - ge_R = 0.5*(ge_ipo + ge_i) - (del_ge_m_ipo - del_ge_m_i) / 6.0; - #endif - #ifdef SCALAR - for (int i=0; i (d_R - d_L) *(d_R - d_L)) d_L = 3.0*d_i - 2.0*d_R; - if ( 6.0*(vx_R - vx_L)*(vx_i - 0.5*(vx_L + vx_R)) > (vx_R - vx_L)*(vx_R - vx_L)) vx_L = 3.0*vx_i - 2.0*vx_R; - if ( 6.0*(vy_R - vy_L)*(vy_i - 0.5*(vy_L + vy_R)) > (vy_R - vy_L)*(vy_R - vy_L)) vy_L = 3.0*vy_i - 2.0*vy_R; - if ( 6.0*(vz_R - vz_L)*(vz_i - 0.5*(vz_L + vz_R)) > (vz_R - vz_L)*(vz_R - vz_L)) vz_L = 3.0*vz_i - 2.0*vz_R; - if ( 6.0*(p_R - p_L) *(p_i - 0.5*(p_L + p_R)) > (p_R - p_L) *(p_R - p_L)) p_L = 3.0*p_i - 2.0*p_R; - - if ( 6.0*(d_R - d_L) *(d_i - 0.5*(d_L + d_R)) < -(d_R - d_L) *(d_R - d_L)) d_R = 3.0*d_i - 2.0*d_L; - if ( 6.0*(vx_R - vx_L)*(vx_i - 0.5*(vx_L + vx_R)) < -(vx_R - vx_L)*(vx_R - vx_L)) vx_R = 3.0*vx_i - 2.0*vx_L; - if ( 6.0*(vy_R - vy_L)*(vy_i - 0.5*(vy_L + vy_R)) < -(vy_R - vy_L)*(vy_R - vy_L)) vy_R = 3.0*vy_i - 2.0*vy_L; - if ( 6.0*(vz_R - vz_L)*(vz_i - 0.5*(vz_L + vz_R)) < -(vz_R - vz_L)*(vz_R - vz_L)) vz_R = 3.0*vz_i - 2.0*vz_L; - if ( 6.0*(p_R - p_L) *(p_i - 0.5*(p_L + p_R)) < -(p_R - p_L) *(p_R - p_L)) p_R = 3.0*p_i - 2.0*p_L; - - d_L = fmax( fmin(d_i, d_imo), d_L ); - d_L = fmin( fmax(d_i, d_imo), d_L ); - d_R = fmax( fmin(d_i, d_ipo), d_R ); - d_R = fmin( fmax(d_i, d_ipo), d_R ); - vx_L = fmax( fmin(vx_i, vx_imo), vx_L ); - vx_L = fmin( fmax(vx_i, vx_imo), vx_L ); - vx_R = fmax( fmin(vx_i, vx_ipo), vx_R ); - vx_R = fmin( fmax(vx_i, vx_ipo), vx_R ); - vy_L = fmax( fmin(vy_i, vy_imo), vy_L ); - vy_L = fmin( fmax(vy_i, vy_imo), vy_L ); - vy_R = fmax( fmin(vy_i, vy_ipo), vy_R ); - vy_R = fmin( fmax(vy_i, vy_ipo), vy_R ); - vz_L = fmax( fmin(vz_i, vz_imo), vz_L ); - vz_L = fmin( fmax(vz_i, vz_imo), vz_L ); - vz_R = fmax( fmin(vz_i, vz_ipo), vz_R ); - vz_R = fmin( fmax(vz_i, vz_ipo), vz_R ); - p_L = fmax( fmin(p_i, p_imo), p_L ); - p_L = fmin( fmax(p_i, p_imo), p_L ); - p_R = fmax( fmin(p_i, p_ipo), p_R ); - p_R = fmin( fmax(p_i, p_ipo), p_R ); - - #ifdef DE - if ((ge_R - ge_i) *(ge_i - ge_L) <= 0) ge_L = ge_R = ge_i; - if ( 6.0*(ge_R - ge_L) *(ge_i - 0.5*(ge_L + ge_R)) > (ge_R - ge_L) *(ge_R - ge_L)) ge_L = 3.0*ge_i - 2.0*ge_R; - if ( 6.0*(ge_R - ge_L) *(ge_i - 0.5*(ge_L + ge_R)) < -(ge_R - ge_L) *(ge_R - ge_L)) ge_R = 3.0*ge_i - 2.0*ge_L; - ge_L = fmax( fmin(ge_i, ge_imo), ge_L ); - ge_L = fmin( fmax(ge_i, ge_imo), ge_L ); - ge_R = fmax( fmin(ge_i, ge_ipo), ge_R ); - ge_R = fmin( fmax(ge_i, ge_ipo), ge_R ); - #endif - - #ifdef SCALAR - for (int i=0; i (scalar_R[i] - scalar_L[i]) *(scalar_R[i] - scalar_L[i])) scalar_L[i] = 3.0*scalar_i[i] - 2.0*scalar_R[i]; - if ( 6.0*(scalar_R[i] - scalar_L[i]) *(scalar_i[i] - 0.5*(scalar_L[i] + scalar_R[i])) < -(scalar_R[i] - scalar_L[i]) *(scalar_R[i] - scalar_L[i])) scalar_R[i] = 3.0*scalar_i[i] - 2.0*scalar_L[i]; - scalar_L[i] = fmax( fmin(scalar_i[i], scalar_imo[i]), scalar_L[i] ); - scalar_L[i] = fmin( fmax(scalar_i[i], scalar_imo[i]), scalar_L[i] ); - scalar_R[i] = fmax( fmin(scalar_i[i], scalar_ipo[i]), scalar_R[i] ); - scalar_R[i] = fmin( fmax(scalar_i[i], scalar_ipo[i]), scalar_R[i] ); - } - #endif - - // #ifdef CTU - #ifndef VL - - // Step 8 - Compute the coefficients for the monotonized parabolic interpolation function - // Stone Eqn 54 - - del_d_m_i = d_R - d_L; - del_vx_m_i = vx_R - vx_L; - del_vy_m_i = vy_R - vy_L; - del_vz_m_i = vz_R - vz_L; - del_p_m_i = p_R - p_L; - - d_6 = 6.0*(d_i - 0.5*(d_L + d_R)); - vx_6 = 6.0*(vx_i - 0.5*(vx_L + vx_R)); - vy_6 = 6.0*(vy_i - 0.5*(vy_L + vy_R)); - vz_6 = 6.0*(vz_i - 0.5*(vz_L + vz_R)); - p_6 = 6.0*(p_i - 0.5*(p_L + p_R)); - - #ifdef DE - del_ge_m_i = ge_R - ge_L; - ge_6 = 6.0*(ge_i - 0.5*(ge_L + ge_R)); - #endif - - #ifdef SCALAR - for (int i=0; i= 0) - { - A = (0.5*dtodx) * (lambda_p - lambda_m); - B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_m*lambda_m); - - chi_1 = A*(del_d_m_i - d_6) + B*d_6; - chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6; - chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6; - chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6; - chi_5 = A*(del_p_m_i - p_6) + B*p_6; - - sum_1 += -0.5*(d_i*chi_2/a - chi_5/(a*a)); - sum_2 += 0.5*(chi_2 - chi_5/(a*d_i)); - sum_5 += -0.5*(d_i*chi_2*a - chi_5); - } - if (lambda_0 >= 0) - { - A = (0.5*dtodx) * (lambda_p - lambda_0); - B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_0*lambda_0); - - chi_1 = A*(del_d_m_i - d_6) + B*d_6; - chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6; - chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6; - chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6; - chi_5 = A*(del_p_m_i - p_6) + B*p_6; - #ifdef DE - chi_ge = A*(del_ge_m_i - ge_6) + B*ge_6; - #endif - #ifdef SCALAR - for (int i=0; i= 0) - { - A = (0.5*dtodx) * (lambda_p - lambda_p); - B = (1.0/3.0)*(dtodx)*(dtodx)*(lambda_p*lambda_p - lambda_p*lambda_p); - - chi_1 = A*(del_d_m_i - d_6) + B*d_6; - chi_2 = A*(del_vx_m_i - vx_6) + B*vx_6; - chi_3 = A*(del_vy_m_i - vy_6) + B*vy_6; - chi_4 = A*(del_vz_m_i - vz_6) + B*vz_6; - chi_5 = A*(del_p_m_i - p_6) + B*p_6; - - sum_1 += 0.5*(d_i*chi_2/a + chi_5/(a*a)); - sum_2 += 0.5*(chi_2 + chi_5/(a*d_i)); - sum_5 += 0.5*(d_i*chi_2*a + chi_5); - } + del_m_i.density = interface_L_iph.density - interface_R_imh.density; + del_m_i.velocity_x = interface_L_iph.velocity_x - interface_R_imh.velocity_x; + del_m_i.velocity_y = interface_L_iph.velocity_y - interface_R_imh.velocity_y; + del_m_i.velocity_z = interface_L_iph.velocity_z - interface_R_imh.velocity_z; + del_m_i.pressure = interface_L_iph.pressure - interface_R_imh.pressure; + + Real const d_6 = 6.0 * (cell_i.density - 0.5 * (interface_R_imh.density + interface_L_iph.density)); + Real const vx_6 = 6.0 * (cell_i.velocity_x - 0.5 * (interface_R_imh.velocity_x + interface_L_iph.velocity_x)); + Real const vy_6 = 6.0 * (cell_i.velocity_y - 0.5 * (interface_R_imh.velocity_y + interface_L_iph.velocity_y)); + Real const vz_6 = 6.0 * (cell_i.velocity_z - 0.5 * (interface_R_imh.velocity_z + interface_L_iph.velocity_z)); + Real const p_6 = 6.0 * (cell_i.pressure - 0.5 * (interface_R_imh.pressure + interface_L_iph.pressure)); + +#ifdef DE + del_m_i.gas_energy = interface_L_iph.gas_energy - interface_R_imh.gas_energy; + Real const ge_6 = 6.0 * (cell_i.gas_energy - 0.5 * (interface_R_imh.gas_energy + interface_L_iph.gas_energy)); +#endif // DE + +#ifdef SCALAR + Real scalar_6[NSCALARS]; + for (int i = 0; i < NSCALARS; i++) { + del_m_i.scalar[i] = interface_L_iph.scalar[i] - interface_R_imh.scalar[i]; + scalar_6[i] = 6.0 * (cell_i.scalar[i] - 0.5 * (interface_R_imh.scalar[i] + interface_L_iph.scalar[i])); + } +#endif // SCALAR + + // Compute the eigenvalues of the linearized equations in the + // primitive variables using the cell-centered primitive variables + + // recalculate the adiabatic sound speed in cell i + sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + + Real const lambda_m = cell_i.velocity_x - sound_speed; + Real const lambda_0 = cell_i.velocity_x; + Real const lambda_p = cell_i.velocity_x + sound_speed; + + // Step 9 - Compute the left and right interface values using monotonized + // parabolic interpolation + // Stone Eqns 55 & 56 + + // largest eigenvalue + Real const lambda_max = fmax(lambda_p, (Real)0); + // smallest eigenvalue + Real const lambda_min = fmin(lambda_m, (Real)0); + + // left interface value, i+1/2 + Real const dtodx = dt / dx; + interface_L_iph.density = + interface_L_iph.density - + lambda_max * (0.5 * dtodx) * (del_m_i.density - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * d_6); + interface_L_iph.velocity_x = + interface_L_iph.velocity_x - + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_x - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vx_6); + interface_L_iph.velocity_y = + interface_L_iph.velocity_y - + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_y - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vy_6); + interface_L_iph.velocity_z = + interface_L_iph.velocity_z - + lambda_max * (0.5 * dtodx) * (del_m_i.velocity_z - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * vz_6); + interface_L_iph.pressure = + interface_L_iph.pressure - + lambda_max * (0.5 * dtodx) * (del_m_i.pressure - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * p_6); + + // right interface value, i-1/2 + interface_R_imh.density = + interface_R_imh.density - + lambda_min * (0.5 * dtodx) * (del_m_i.density + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * d_6); + interface_R_imh.velocity_x = + interface_R_imh.velocity_x - + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_x + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vx_6); + interface_R_imh.velocity_y = + interface_R_imh.velocity_y - + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_y + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vy_6); + interface_R_imh.velocity_z = + interface_R_imh.velocity_z - + lambda_min * (0.5 * dtodx) * (del_m_i.velocity_z + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * vz_6); + interface_R_imh.pressure = + interface_R_imh.pressure - + lambda_min * (0.5 * dtodx) * (del_m_i.pressure + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * p_6); + +#ifdef DE + interface_L_iph.gas_energy = + interface_L_iph.gas_energy - + lambda_max * (0.5 * dtodx) * (del_m_i.gas_energy - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * ge_6); + interface_R_imh.gas_energy = + interface_R_imh.gas_energy - + lambda_min * (0.5 * dtodx) * (del_m_i.gas_energy + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * ge_6); +#endif // DE + +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] = + interface_L_iph.scalar[i] - + lambda_max * (0.5 * dtodx) * (del_m_i.scalar[i] - (1.0 - (2.0 / 3.0) * lambda_max * dtodx) * scalar_6[i]); + interface_R_imh.scalar[i] = + interface_R_imh.scalar[i] - + lambda_min * (0.5 * dtodx) * (del_m_i.scalar[i] + (1.0 + (2.0 / 3.0) * lambda_min * dtodx) * scalar_6[i]); + } +#endif // SCALAR + + // Step 10 - Perform the characteristic tracing + // Stone Eqns 57 - 60 + + // left-hand interface value, i+1/2 + Real sum_1 = 0, sum_2 = 0, sum_3 = 0, sum_4 = 0, sum_5 = 0; +#ifdef DE + Real sum_ge = 0; + Real chi_ge = 0; +#endif // DE +#ifdef SCALAR + Real chi_scalar[NSCALARS]; + Real sum_scalar[NSCALARS]; + for (Real &val : sum_scalar) { + val = 0; + } +#endif // SCALAR - // add the corrections to the initial guesses for the interface values - d_R += sum_1; - vx_R += sum_2; - vy_R += sum_3; - vz_R += sum_4; - p_R += sum_5; - #ifdef DE - ge_R += sum_ge; - #endif - #ifdef SCALAR - for (int i=0; i= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_m); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_m * lambda_m); - // add the corrections - d_L += sum_1; - vx_L += sum_2; - vy_L += sum_3; - vz_L += sum_4; - p_L += sum_5; - #ifdef DE - ge_L += sum_ge; - #endif - #ifdef SCALAR - for (int i=0; i= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_0); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_0 * lambda_0); + + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; +#ifdef DE + chi_ge = A * (del_m_i.gas_energy - ge_6) + B * ge_6; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = A * (del_m_i.scalar[i] - scalar_6[i]) + B * scalar_6[i]; + } +#endif // SCALAR + + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); + sum_3 += chi_3; + sum_4 += chi_4; +#ifdef DE + sum_ge += chi_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; + } +#endif // SCALAR + } + if (lambda_p >= 0) { + Real const A = (0.5 * dtodx) * (lambda_p - lambda_p); + Real const B = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_p * lambda_p - lambda_p * lambda_p); + + Real const chi_1 = A * (del_m_i.density - d_6) + B * d_6; + Real const chi_2 = A * (del_m_i.velocity_x - vx_6) + B * vx_6; + Real const chi_3 = A * (del_m_i.velocity_y - vy_6) + B * vy_6; + Real const chi_4 = A * (del_m_i.velocity_z - vz_6) + B * vz_6; + Real const chi_5 = A * (del_m_i.pressure - p_6) + B * p_6; + + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); + } + + // add the corrections to the initial guesses for the interface values + interface_L_iph.density += sum_1; + interface_L_iph.velocity_x += sum_2; + interface_L_iph.velocity_y += sum_3; + interface_L_iph.velocity_z += sum_4; + interface_L_iph.pressure += sum_5; +#ifdef DE + interface_L_iph.gas_energy += sum_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_L_iph.scalar[i] += sum_scalar[i]; + } +#endif // SCALAR + + // right-hand interface value, i-1/2 + sum_1 = 0; + sum_2 = 0; + sum_3 = 0; + sum_4 = 0; + sum_5 = 0; +#ifdef DE + sum_ge = 0; +#endif // DE +#ifdef SCALAR + for (Real &val : sum_scalar) { + val = 0; + } +#endif // SCALAR + if (lambda_m <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_m); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_m * lambda_m); + + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; + + sum_1 += -0.5 * (cell_i.density * chi_2 / sound_speed - chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 - chi_5 / (sound_speed * cell_i.density)); + sum_5 += -0.5 * (cell_i.density * chi_2 * sound_speed - chi_5); + } + if (lambda_0 <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_0); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_0 * lambda_0); + + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; +#ifdef DE + chi_ge = C * (del_m_i.gas_energy + ge_6) + D * ge_6; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + chi_scalar[i] = C * (del_m_i.scalar[i] + scalar_6[i]) + D * scalar_6[i]; + } +#endif // SCALAR + + sum_1 += chi_1 - chi_5 / (sound_speed * sound_speed); + sum_3 += chi_3; + sum_4 += chi_4; +#ifdef DE + sum_ge += chi_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + sum_scalar[i] += chi_scalar[i]; + } +#endif // SCALAR + } + if (lambda_p <= 0) { + Real const C = (0.5 * dtodx) * (lambda_m - lambda_p); + Real const D = (1.0 / 3.0) * (dtodx) * (dtodx) * (lambda_m * lambda_m - lambda_p * lambda_p); + + Real const chi_1 = C * (del_m_i.density + d_6) + D * d_6; + Real const chi_2 = C * (del_m_i.velocity_x + vx_6) + D * vx_6; + Real const chi_3 = C * (del_m_i.velocity_y + vy_6) + D * vy_6; + Real const chi_4 = C * (del_m_i.velocity_z + vz_6) + D * vz_6; + Real const chi_5 = C * (del_m_i.pressure + p_6) + D * p_6; + + sum_1 += 0.5 * (cell_i.density * chi_2 / sound_speed + chi_5 / (sound_speed * sound_speed)); + sum_2 += 0.5 * (chi_2 + chi_5 / (sound_speed * cell_i.density)); + sum_5 += 0.5 * (cell_i.density * chi_2 * sound_speed + chi_5); + } + // add the corrections + interface_R_imh.density += sum_1; + interface_R_imh.velocity_x += sum_2; + interface_R_imh.velocity_y += sum_3; + interface_R_imh.velocity_z += sum_4; + interface_R_imh.pressure += sum_5; +#ifdef DE + interface_R_imh.gas_energy += sum_ge; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + interface_R_imh.scalar[i] += sum_scalar[i]; } +#endif // SCALAR + + // This is the end of the characteristic tracing + + // enforce minimum values + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Step 11 - Send final values back from kernel + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); } +// ===================================================================================================================== +// ===================================================================================================================== +__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real gamma, int dir) +{ + // get a thread ID + int const thread_id = threadIdx.x + blockIdx.x * blockDim.x; + int xid, yid, zid; + cuda_utilities::compute3DIndices(thread_id, nx, ny, xid, yid, zid); -#endif //PPMC -#endif //CUDA + // Ensure that we are only operating on cells that will be used + if (reconstruction::Thread_Guard<3>(nx, ny, nz, xid, yid, zid)) { + return; + } + + // Compute the total number of cells + int const n_cells = nx * ny * nz; + + // Set the field indices for the various directions + int o1, o2, o3; + switch (dir) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; + } + // load the 5-cell stencil into registers + // cell i + reconstruction::Primitive const cell_i = + reconstruction::Load_Data(dev_conserved, xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-1. The equality checks the direction and will subtract one from the correct direction + // im1 stands for "i minus 1" + reconstruction::Primitive const cell_im1 = reconstruction::Load_Data( + dev_conserved, xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+1. The equality checks the direction and add one to the correct direction + // ip1 stands for "i plus 1" + reconstruction::Primitive const cell_ip1 = reconstruction::Load_Data( + dev_conserved, xid + int(dir == 0), yid + int(dir == 1), zid + int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i-2. The equality checks the direction and will subtract two from the correct direction + // im2 stands for "i minus 2" + reconstruction::Primitive const cell_im2 = + reconstruction::Load_Data(dev_conserved, xid - 2 * int(dir == 0), yid - 2 * int(dir == 1), + zid - 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // cell i+2. The equality checks the direction and add two to the correct direction + // ip2 stands for "i plus 2" + reconstruction::Primitive const cell_ip2 = + reconstruction::Load_Data(dev_conserved, xid + 2 * int(dir == 0), yid + 2 * int(dir == 1), + zid + 2 * int(dir == 2), nx, ny, n_cells, o1, o2, o3, gamma); + + // Convert to the characteristic variables + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(cell_i.pressure, cell_i.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + +#ifdef MHD + reconstruction::EigenVecs eigenvectors = + reconstruction::Compute_Eigenvectors(cell_i, sound_speed, sound_speed_squared, gamma); +#else + reconstruction::EigenVecs eigenvectors; +#endif // MHD + + // Cell i + reconstruction::Characteristic const cell_i_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_i, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i-1 + reconstruction::Characteristic const cell_im1_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_im1, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i-2 + reconstruction::Characteristic const cell_im2_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_im2, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i+1 + reconstruction::Characteristic const cell_ip1_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_ip1, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Cell i+2 + reconstruction::Characteristic const cell_ip2_characteristic = reconstruction::Primitive_To_Characteristic( + cell_i, cell_ip2, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Compute the interface states for each field + reconstruction::Characteristic interface_R_imh_characteristic, interface_L_iph_characteristic; + + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a0, cell_im1_characteristic.a0, cell_i_characteristic.a0, + cell_ip1_characteristic.a0, cell_ip2_characteristic.a0, + interface_L_iph_characteristic.a0, interface_R_imh_characteristic.a0); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a1, cell_im1_characteristic.a1, cell_i_characteristic.a1, + cell_ip1_characteristic.a1, cell_ip2_characteristic.a1, + interface_L_iph_characteristic.a1, interface_R_imh_characteristic.a1); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a2, cell_im1_characteristic.a2, cell_i_characteristic.a2, + cell_ip1_characteristic.a2, cell_ip2_characteristic.a2, + interface_L_iph_characteristic.a2, interface_R_imh_characteristic.a2); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a3, cell_im1_characteristic.a3, cell_i_characteristic.a3, + cell_ip1_characteristic.a3, cell_ip2_characteristic.a3, + interface_L_iph_characteristic.a3, interface_R_imh_characteristic.a3); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a4, cell_im1_characteristic.a4, cell_i_characteristic.a4, + cell_ip1_characteristic.a4, cell_ip2_characteristic.a4, + interface_L_iph_characteristic.a4, interface_R_imh_characteristic.a4); + +#ifdef MHD + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a5, cell_im1_characteristic.a5, cell_i_characteristic.a5, + cell_ip1_characteristic.a5, cell_ip2_characteristic.a5, + interface_L_iph_characteristic.a5, interface_R_imh_characteristic.a5); + reconstruction::PPM_Single_Variable(cell_im2_characteristic.a6, cell_im1_characteristic.a6, cell_i_characteristic.a6, + cell_ip1_characteristic.a6, cell_ip2_characteristic.a6, + interface_L_iph_characteristic.a6, interface_R_imh_characteristic.a6); +#endif // MHD + + // Convert back to primitive variables + reconstruction::Primitive interface_L_iph = reconstruction::Characteristic_To_Primitive( + cell_i, interface_L_iph_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + reconstruction::Primitive interface_R_imh = reconstruction::Characteristic_To_Primitive( + cell_i, interface_R_imh_characteristic, eigenvectors, sound_speed, sound_speed_squared, gamma); + + // Compute the interfaces for the variables that don't have characteristics +#ifdef DE + reconstruction::PPM_Single_Variable(cell_im2.gas_energy, cell_im1.gas_energy, cell_i.gas_energy, cell_ip1.gas_energy, + cell_ip2.gas_energy, interface_L_iph.gas_energy, interface_R_imh.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + reconstruction::PPM_Single_Variable(cell_im2.scalar[i], cell_im1.scalar[i], cell_i.scalar[i], cell_ip1.scalar[i], + cell_ip2.scalar[i], interface_L_iph.scalar[i], interface_R_imh.scalar[i]); + } +#endif // SCALAR + + // enforce minimum values + interface_R_imh.density = fmax(interface_R_imh.density, (Real)TINY_NUMBER); + interface_L_iph.density = fmax(interface_L_iph.density, (Real)TINY_NUMBER); + interface_R_imh.pressure = fmax(interface_R_imh.pressure, (Real)TINY_NUMBER); + interface_L_iph.pressure = fmax(interface_L_iph.pressure, (Real)TINY_NUMBER); + + // Step 11 - Send final values back from kernel + + // Convert the left and right states in the primitive to the conserved variables send final values back from kernel + // bounds_R refers to the right side of the i-1/2 interface + size_t id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + reconstruction::Write_Data(interface_L_iph, dev_bounds_L, dev_conserved, id, n_cells, o1, o2, o3, gamma); + + id = cuda_utilities::compute1DIndex(xid - int(dir == 0), yid - int(dir == 1), zid - int(dir == 2), nx, ny); + reconstruction::Write_Data(interface_R_imh, dev_bounds_R, dev_conserved, id, n_cells, o1, o2, o3, gamma); +} +// ===================================================================================================================== diff --git a/src/reconstruction/ppmc_cuda.h b/src/reconstruction/ppmc_cuda.h index 6c7cfd9fc..916853874 100644 --- a/src/reconstruction/ppmc_cuda.h +++ b/src/reconstruction/ppmc_cuda.h @@ -1,18 +1,53 @@ /*! \file ppmc_cuda.h - * \brief Declarations of the cuda ppm kernels, characteristic reconstruction version. */ -#ifdef CUDA -#ifdef PPMC + * \brief Declarations of the cuda ppm kernels, characteristic reconstruction + * version. */ #ifndef PPMC_CUDA_H #define PPMC_CUDA_H #include "../global/global.h" -/*! \fn void PPMC(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm. */ -__global__ void PPMC_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); +/*! + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables and + * characteristic tracing. Used for the CTU and SIMPLE integrators. This uses the PPM method described in + * Stone et al. 2008 "Athena: A New Code for Astrophysical MHD". Fundementally this method relies on a Van Leer limiter + * in the characteristic variables to monotonize the slopes followed by limiting the interface states using the limiter + * from Colella & Woodward 1984. + * + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] dx The length of the cells in the `dir` direction + * \param[in] dt The time step + * \param[in] gamma The adiabatic index + * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z + */ +__global__ void PPMC_CTU(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, Real dx, + Real dt, Real gamma, int dir); -#endif // PPMC_CUDA_H -#endif // PPMC -#endif // CUDA +/*! + * \brief Computes the left and right interface states using PPM with limiting in the characteristic variables. Used for + * the VL (Van Leer) integrators. This uses the PPM method described in + * Felker & Stone 2018 "A fourth-order accurate finite volume method for ideal MHD via upwind constrained transport". + * This method computes the 3rd order interface then applies a mixture of monoticity constraints from from Colella & + * Sekora 2008, McCorquodale & Colella 2011, and Colella et al. 2011; for details see the + * `reconstruction::PPM_Single_Variable` function. We found that this newer method and limiters was more stable, less + * oscillatory, and faster than the method described in Stone et al. 2008 which is used in PPMC_CTU. The difference is + * most pronounced in the Brio & Wu shock tube where the PPM oscillations are much smaller using this method. + * + * \param[in] dev_conserved The conserved variable array + * \param[out] dev_bounds_L The array of left interfaces + * \param[out] dev_bounds_R The array of right interfaces + * \param[in] nx The number of cells in the X-direction + * \param[in] ny The number of cells in the Y-direction + * \param[in] nz The number of cells in the Z-direction + * \param[in] gamma The adiabatic index + * \param[in] dir The direction to reconstruct. 0=X, 1=Y, 2=Z + */ +__global__ __launch_bounds__(TPB) void PPMC_VL(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, + int ny, int nz, Real gamma, int dir); + +#endif // PPMC_CUDA_H diff --git a/src/reconstruction/ppmc_cuda_tests.cu b/src/reconstruction/ppmc_cuda_tests.cu new file mode 100644 index 000000000..9e9b11140 --- /dev/null +++ b/src/reconstruction/ppmc_cuda_tests.cu @@ -0,0 +1,259 @@ +/*! + * \file ppmc_cuda_tests.cu + * \brief Tests for the contents of ppmc_cuda.h and ppmc_cuda.cu + * + */ + +// STL Includes +#include +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../io/io.h" +#include "../reconstruction/ppmc_cuda.h" +#include "../utils/DeviceVector.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" + +TEST(tHYDROPpmcCTUReconstructor, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; + size_t const n_fields = 5; + double const dx = doubleRand(prng); + double const dt = doubleRand(prng); + double const gamma = 5.0 / 3.0; + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (double &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + // Fiducial Data + std::vector> fiducial_interface_left = {{{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.2002498722761787}, + {734, 1.764334292986655}, + {950, 3.3600925565746804}}, + {{86, 2.4950488327292639}, + {302, 0.79287723513518138}, + {518, 1.7614576990062414}, + {734, 1.8238574169157304}, + {950, 3.14294317122161}}, + {{86, 2.6558981128823214}, + {302, 0.84399195916314151}, + {518, 2.0109603398129137}, + {734, 1.764334292986655}, + {950, 3.2100231679403066}}}; + + std::vector> fiducial_interface_right = {{{85, 2.6558981128823214}, + {301, 0.84399195916314151}, + {517, 1.8381070277226794}, + {733, 1.764334292986655}, + {949, 3.0847691079841209}}, + {{80, 3.1281603739188069}, + {296, 0.99406757727427164}, + {512, 1.8732124042412865}, + {728, 1.6489758692176784}, + {944, 2.8820015278590443}}, + {{50, 2.6558981128823214}, + {266, 0.84399195916314151}, + {482, 2.0109603398129137}, + {698, 1.764334292986655}, + {914, 3.2100231679403066}}}; + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(host_grid.size(), true); + cuda_utilities::DeviceVector dev_interface_right(host_grid.size(), true); + + // Launch kernel + hipLaunchKernelGGL(PPMC_CTU, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, dx, dt, gamma, direction); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < host_grid.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} + +TEST(tALLPpmcVLReconstructor, CorrectInputExpectCorrectOutput) +{ +#ifdef DE + /// This test doesn't support Dual Energy. It wouldn't be that hard to add support for DE but the DE parts of the + /// reconstructor (loading and PPM_Single_Variable) are well tested elsewhere so there's no need to add the extra + /// complexity here. + GTEST_SKIP(); +#endif // DE + + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(0.1, 5); + + // Mock up needed information + size_t const nx = 6; + size_t const ny = 6; + size_t const nz = 6; + double const gamma = 5.0 / 3.0; +#ifdef MHD + size_t const n_fields = 8; +#else // not MHD + size_t const n_fields = 5; +#endif // MHD + + // Setup host grid. Fill host grid with random values and randomly assign maximum value + std::vector host_grid(nx * ny * nz * n_fields); + for (double &val : host_grid) { + val = doubleRand(prng); + } + + // Allocating and copying to device + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + +// Fiducial Data +#ifdef MHD + std::vector> fiducial_interface_left = {{{86, 3.6926886385390683}, + {302, 2.3022467009220993}, + {518, 2.3207781368125389}, + {734, 2.6544338753333747}, + {950, 11.430630157120799}, + {1166, 0.6428577630032507}, + {1382, 4.1406925096276597}}, + {{86, 3.811691682348938}, + {302, 1.4827993897794758}, + {518, 2.3955690789476871}, + {734, 4.06241130448349}, + {950, 10.552876853630949}, + {1166, 3.5147238706385471}, + {1382, 1.2344879085821312}}, + {{86, 3.1608655959160155}, + {302, 1.5377824007725194}, + {518, 0.41798730655927896}, + {734, 2.2721408530383784}, + {950, 5.6329522765789646}, + {1166, 0.84450832590555991}, + {1382, 1.4279317910797107}}}; + + std::vector> fiducial_interface_right = {{{85, 2.8949509658187838}, + {301, 0.25766140043685887}, + {517, 1.8194165731976308}, + {733, 2.0809921071868756}, + {949, 8.1315538869542046}, + {1165, 0.49708185787322312}, + {1381, 3.2017395511439881}}, + {{80, 2.8600082827930269}, + {296, 0.37343415089084014}, + {512, 1.7974558224423689}, + {728, 0.94369445956099784}, + {944, 7.7011501503138504}, + {1160, 3.5147238706385471}, + {1376, 1.2344879085821312}}, + {{50, 3.1608655959160155}, + {266, 0.32035830490636008}, + {482, 3.1721881746709815}, + {698, 2.2721408530383784}, + {914, 14.017699282483312}, + {1130, 1.5292690020097823}, + {1346, -0.12121484974901264}}}; +#else // not MHD + std::vector> fiducial_interface_left = { + {{86, 4.155160222900312}, {302, 1.1624633361407897}, {518, 1.6379195998743412}, {734, 2.9868746414179093}}, + {{86, 4.1795874335665655}, {302, 2.1094239978455054}, {518, 2.6811988240843849}, {734, 4.2540957888954054}}, + {{86, 2.1772852940944429}, {302, 0.58167501916840214}, {518, 1.3683785996473696}, {734, 0.40276763592716164}}}; + + std::vector> fiducial_interface_right = {{{54, 3.8655260187947502}, + {85, 2.6637168309565289}, + {301, 0.69483650107094164}, + {517, 2.7558388224532218}, + {733, 1.9147729154830744}}, + {{54, 5.7556871317935459}, + {80, 2.6515032256234021}, + {296, 0.39344537106429511}, + {512, 1.6491544916805785}, + {728, 0.85830485311660487}}, + {{50, 2.8254070932730269}, + {54, 2.1884721760267873}, + {266, 0.75482470285166003}, + {482, 1.7757096932649317}, + {698, 3.6101832818706452}}}; +#endif // MHD + + // Loop over different directions + for (size_t direction = 0; direction < 3; direction++) { + // Allocate device buffers + cuda_utilities::DeviceVector dev_interface_left(nx * ny * nz * (n_fields - 1), true); + cuda_utilities::DeviceVector dev_interface_right(nx * ny * nz * (n_fields - 1), true); + + // Launch kernel + hipLaunchKernelGGL(PPMC_VL, dev_grid.size(), 1, 0, 0, dev_grid.data(), dev_interface_left.data(), + dev_interface_right.data(), nx, ny, nz, gamma, direction); + GPU_Error_Check(); + GPU_Error_Check(cudaDeviceSynchronize()); + + // Perform Comparison + for (size_t i = 0; i < dev_interface_left.size(); i++) { + // Check the left interface + double test_val = dev_interface_left.at(i); + double fiducial_val = + (fiducial_interface_left.at(direction).find(i) == fiducial_interface_left.at(direction).end()) + ? 0.0 + : fiducial_interface_left.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "left interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + + // Check the right interface + test_val = dev_interface_right.at(i); + fiducial_val = (fiducial_interface_right.at(direction).find(i) == fiducial_interface_right.at(direction).end()) + ? 0.0 + : fiducial_interface_right.at(direction)[i]; + + testing_utilities::Check_Results( + fiducial_val, test_val, + "right interface at i=" + std::to_string(i) + ", in direction " + std::to_string(direction)); + } + } +} diff --git a/src/reconstruction/ppmp_cuda.cu b/src/reconstruction/ppmp_cuda.cu index ccd1f5a87..2038f215a 100644 --- a/src/reconstruction/ppmp_cuda.cu +++ b/src/reconstruction/ppmp_cuda.cu @@ -1,38 +1,49 @@ /*! \file ppmp_cuda.cu - * \brief Definitions of the piecewise parabolic reconstruction (Fryxell 2000) functions - with limiting in the primitive variables. */ -#ifdef CUDA + * \brief Definitions of the piecewise parabolic reconstruction (Fryxell 2000) + functions with limiting in the primitive variables. */ + #ifdef PPMP -#include "../utils/gpu.hpp" -#include -#include "../global/global.h" -#include "../global/global_cuda.h" -#include "../reconstruction/ppmp_cuda.h" + #include + + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../reconstruction/ppmp_cuda.h" + #include "../utils/gpu.hpp" -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" -#endif + #ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" + #endif // #define STEEPENING // #define FLATTENING -//Note: Errors when using FLATTENING, need to check the ghost cells - -/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm with limiting in the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) +// Note: Errors when using FLATTENING, need to check the ghost cells + +/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using ppm with limiting in + the primitive variables. */ +__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) { - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } // declare primitive variables in the stencil @@ -43,7 +54,7 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real d_ipt, vx_ipt, vy_ipt, vz_ipt, p_ipt; #ifdef FLATTENING Real p_imth, p_ipth; - #endif + #endif // FLATTENING // declare left and right interface values Real d_L, vx_L, vy_L, vz_L, p_L; @@ -53,13 +64,13 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real del_q_imo, del_q_i, del_q_ipo; #ifndef VL -// #ifdef CTU - Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries - Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i + // #ifdef CTU + Real cs, cl, cr; // sound speed in cell i, and at left and right boundaries + Real del_d, del_vx, del_vy, del_vz, del_p; // "slope" accross cell i Real d_6, vx_6, vy_6, vz_6, p_6; Real beta_m, beta_0, beta_p; Real alpha_m, alpha_0, alpha_p; - Real lambda_m, lambda_0, lambda_p; // speed of characteristics + Real lambda_m, lambda_0, lambda_p; // speed of characteristics Real dL_m, vxL_m, pL_m; Real dL_0, vyL_0, vzL_0, pL_0; Real vxL_p, pL_p; @@ -68,34 +79,32 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou Real dR_p, vxR_p, pR_p; Real chi_L_m, chi_L_0, chi_L_p; Real chi_R_m, chi_R_0, chi_R_p; - #endif + #endif // CTU #ifdef DE Real ge_i, ge_imo, ge_ipo, ge_imt, ge_ipt, ge_L, ge_R, E_kin, E, dge; - #ifndef VL -// #ifdef CTU + #ifndef VL + // #ifdef CTU Real del_ge, ge_6, geL_0, geR_0; - #endif - #endif + #endif // CTU + #endif // DE #ifdef SCALAR Real scalar_i[NSCALARS], scalar_imo[NSCALARS], scalar_ipo[NSCALARS], scalar_imt[NSCALARS], scalar_ipt[NSCALARS]; Real scalar_L[NSCALARS], scalar_R[NSCALARS]; - #ifndef VL -// #ifdef CTU + #ifndef VL + // #ifdef CTU Real del_scalar[NSCALARS], scalar_6[NSCALARS], scalarL_0[NSCALARS], scalarR_0[NSCALARS]; - #endif - #endif - - + #endif // CTU + #endif // SCALAR // get a thread ID - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId*blockDim.x; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; int id; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; int xs, xe, ys, ye, zs, ze; @@ -116,403 +125,442 @@ __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bou // zs = 3; ze = nz-4; // } - //Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost cells on each side ) + // Ignore only the 2 ghost cells on each side ( instead of ignoring 3 ghost + // cells on each side ) if (dir == 0) { - xs = 2; xe = nx-3; - ys = 0; ye = ny; - zs = 0; ze = nz; + xs = 2; + xe = nx - 3; + ys = 0; + ye = ny; + zs = 0; + ze = nz; } if (dir == 1) { - xs = 0; xe = nx; - ys = 2; ye = ny-3; - zs = 0; ze = nz; + xs = 0; + xe = nx; + ys = 2; + ye = ny - 3; + zs = 0; + ze = nz; } if (dir == 2) { - xs = 0; xe = nx; - ys = 0; ye = ny; - zs = 2; ze = nz-3; + xs = 0; + xe = nx; + ys = 0; + ye = ny; + zs = 2; + ze = nz - 3; } - if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) - { + if (xid >= xs && xid < xe && yid >= ys && yid < ye && zid >= zs && zid < ze) { // load the 5-cell stencil into registers // cell i - id = xid + yid*nx + zid*nx*ny; - d_i = dev_conserved[ id]; - vx_i = dev_conserved[o1*n_cells + id] / d_i; - vy_i = dev_conserved[o2*n_cells + id] / d_i; - vz_i = dev_conserved[o3*n_cells + id] / d_i; - #ifdef DE //PRESSURE_DE - E = dev_conserved[4*n_cells + id]; - E_kin = 0.5 * d_i * ( vx_i*vx_i + vy_i*vy_i + vz_i*vz_i ); - dge = dev_conserved[(n_fields-1)*n_cells + id]; - p_i = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - p_i = (dev_conserved[4*n_cells + id] - 0.5*d_i*(vx_i*vx_i + vy_i*vy_i + vz_i*vz_i)) * (gamma - 1.0); - #endif //PRESSURE_DE - p_i = fmax(p_i, (Real) TINY_NUMBER); - #ifdef DE + id = xid + yid * nx + zid * nx * ny; + d_i = dev_conserved[id]; + vx_i = dev_conserved[o1 * n_cells + id] / d_i; + vy_i = dev_conserved[o2 * n_cells + id] / d_i; + vz_i = dev_conserved[o3 * n_cells + id] / d_i; + #ifdef DE // PRESSURE_DE + E = dev_conserved[4 * n_cells + id]; + E_kin = 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i); + dge = dev_conserved[(n_fields - 1) * n_cells + id]; + p_i = hydro_utilities::Get_Pressure_From_DE(E, E - E_kin, dge, gamma); + #else + p_i = (dev_conserved[4 * n_cells + id] - 0.5 * d_i * (vx_i * vx_i + vy_i * vy_i + vz_i * vz_i)) * (gamma - 1.0); + #endif // PRESSURE_DE + p_i = fmax(p_i, (Real)TINY_NUMBER); + #ifdef DE ge_i = dge / d_i; - #endif - #ifdef SCALAR - for (int i=0; i 0.01) - { - //calculate the second derivative of the density in the imo and ipo cells + // check for contact discontinuities & steepen if necessary (see Fryxell + // Sec 3.1.2) if condition 4 (Fryxell Eqn 37) (Colella Eqn 1.16.5) is true, + // check further conditions, otherwise do nothing + if ((fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo)) > 0.01) { + // calculate the second derivative of the density in the imo and ipo cells d2_rho_imo = calc_d2_rho(d_imt, d_imo, d_i, dx); d2_rho_ipo = calc_d2_rho(d_i, d_ipo, d_ipt, dx); - //if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check further conditions, otherwise do nothing - if ((d2_rho_imo * d2_rho_ipo) < 0) - { - //calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) (Colella Eqn 3.2) - //if c5 is true, set value of eta for discontinuity steepening - if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) - { - //calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5) + // if condition 1 (Fryxell Eqn 38) (Colella Eqn 1.16.5) is true, check + // further conditions, otherwise do nothing + if ((d2_rho_imo * d2_rho_ipo) < 0) { + // calculate condition 5, pressure vs density jumps (Fryxell Eqn 39) + // (Colella Eqn 3.2) if c5 is true, set value of eta for discontinuity + // steepening + if ((fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo)) < 0.1 * gamma * (fabs(d_ipo - d_imo) / fmin(d_ipo, d_imo))) { + // calculate first eta value (Fryxell Eqn 36) (Colella Eqn 1.16.5) eta_i = calc_eta(d2_rho_imo, d2_rho_ipo, dx, d_imo, d_ipo); - //calculate steepening coefficient (Fryxell Eqn 40) (Colella Eqn 1.16) - eta_i = fmax(0, fmin(20*(eta_i-0.05), 1) ); + // calculate steepening coefficient (Fryxell Eqn 40) (Colella + // Eqn 1.16) + eta_i = fmax(0, fmin(20 * (eta_i - 0.05), 1)); - //calculate new left and right interface variables using monotonized slopes + // calculate new left and right interface variables using monotonized + // slopes del_q_imo = Calculate_Slope(d_imt, d_imo, d_i); del_q_ipo = Calculate_Slope(d_i, d_ipo, d_ipt); - //replace left and right interface values of density (Colella Eqn 1.14, 1.15) - d_L = d_L*(1-eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i; - d_R = d_R*(1-eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i; + // replace left and right interface values of density (Colella + // Eqn 1.14, 1.15) + d_L = d_L * (1 - eta_i) + (d_imo + 0.5 * del_q_imo) * eta_i; + d_R = d_R * (1 - eta_i) + (d_ipo - 0.5 * del_q_ipo) * eta_i; } } } -#endif + #endif // STEEPENING -#ifdef FLATTENING + #ifdef FLATTENING Real F_imo, F_i, F_ipo; - //flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3) - //calculate the shock steepness parameters (Fryxell Eqn 43) - //calculate the dimensionless flattening coefficients (Fryxell Eqn 45) - F_imo = fmax( 0, fmin(1, 10*(( (p_i - p_imt) / (p_ipo - p_imth)) - 0.75)) ); - F_i = fmax( 0, fmin(1, 10*(( (p_ipo - p_imo) / (p_ipt - p_imt)) - 0.75)) ); - F_ipo = fmax( 0, fmin(1, 10*(( (p_ipt - p_i) / (p_ipth - p_imo)) - 0.75)) ); - //ensure that we are encountering a shock (Fryxell Eqns 46 & 47) - if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1./3.) {F_imo = 0;} - if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1./3.) {F_i = 0;} - if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1./3.) {F_ipo = 0;} - if (vx_i - vx_imt > 0) {F_imo = 0;} - if (vx_ipo - vx_imo > 0) {F_i = 0;} - if (vx_ipt - vx_i > 0) {F_ipo = 0;} - //set the flattening coefficient (Fryxell Eqn 48) - if (p_ipo - p_imo < 0) {F_i = fmax(F_i, F_ipo);} - else {F_i = fmax(F_i, F_imo);} - //modify the interface values - d_L = F_i * d_i + (1 - F_i) * d_L; + // flatten shock fronts that are too narrow (see Fryxell Sec 3.1.3) + // calculate the shock steepness parameters (Fryxell Eqn 43) + // calculate the dimensionless flattening coefficients (Fryxell Eqn 45) + F_imo = fmax(0, fmin(1, 10 * (((p_i - p_imt) / (p_ipo - p_imth)) - 0.75))); + F_i = fmax(0, fmin(1, 10 * (((p_ipo - p_imo) / (p_ipt - p_imt)) - 0.75))); + F_ipo = fmax(0, fmin(1, 10 * (((p_ipt - p_i) / (p_ipth - p_imo)) - 0.75))); + // ensure that we are encountering a shock (Fryxell Eqns 46 & 47) + if (fabs(p_i - p_imt) / fmin(p_i, p_imt) < 1. / 3.) { + F_imo = 0; + } + if (fabs(p_ipo - p_imo) / fmin(p_ipo, p_imo) < 1. / 3.) { + F_i = 0; + } + if (fabs(p_ipt - p_i) / fmin(p_ipt, p_i) < 1. / 3.) { + F_ipo = 0; + } + if (vx_i - vx_imt > 0) { + F_imo = 0; + } + if (vx_ipo - vx_imo > 0) { + F_i = 0; + } + if (vx_ipt - vx_i > 0) { + F_ipo = 0; + } + // set the flattening coefficient (Fryxell Eqn 48) + if (p_ipo - p_imo < 0) { + F_i = fmax(F_i, F_ipo); + } else { + F_i = fmax(F_i, F_imo); + } + // modify the interface values + d_L = F_i * d_i + (1 - F_i) * d_L; vx_L = F_i * vx_i + (1 - F_i) * vx_L; vy_L = F_i * vy_i + (1 - F_i) * vy_L; vz_L = F_i * vz_i + (1 - F_i) * vz_L; - p_L = F_i * p_i + (1 - F_i) * p_L; + p_L = F_i * p_i + (1 - F_i) * p_L; #ifdef DE ge_L = F_i * ge_i + (1 - F_i) * ge_L; - #endif + #endif // DE #ifdef SCALAR - for (int i=0; i= 0) { chi_L_m = 0; } - if (lambda_0 >= 0) { chi_L_0 = 0; } - if (lambda_p >= 0) { chi_L_p = 0; } - if (lambda_m <= 0) { chi_R_m = 0; } - if (lambda_0 <= 0) { chi_R_0 = 0; } - if (lambda_p <= 0) { chi_R_p = 0; } - - // use the chi values to correct the initial guesses and calculate final input states - p_L = p_L + (d_L*d_L*cl*cl) * (chi_L_p + chi_L_m); - vx_L = vx_L + d_L*cl * (chi_L_p - chi_L_m); - d_L = pow( ((1.0/d_L) - (chi_L_m + chi_L_0 + chi_L_p)) , -1); - p_R = p_L + (d_R*d_R*cr*cr) * (chi_R_p + chi_R_m); - vx_R = vx_R + d_R*cr * (chi_R_p - chi_R_m); - d_R = pow( ((1.0/d_R) - (chi_R_m + chi_R_0 + chi_R_p)) , -1); -#endif //CTU + chi_L_m = 1. / (2 * d_L * cl) * (vx_L - vxL_m - (p_L - pL_m) / (d_L * cl)); + chi_L_p = -1. / (2 * d_L * cl) * (vx_L - vxL_p + (p_L - pL_p) / (d_L * cl)); + chi_L_0 = (p_L - pL_0) / (d_L * d_L * cl * cl) + 1. / d_L - 1. / dL_0; + chi_R_m = 1. / (2 * d_R * cr) * (vx_R - vxR_m - (p_R - pR_m) / (d_R * cr)); + chi_R_p = -1. / (2 * d_R * cr) * (vx_R - vxR_p + (p_R - pR_p) / (d_R * cr)); + chi_R_0 = (p_R - pR_0) / (d_R * d_R * cr * cr) + 1. / d_R - 1. / dR_0; + + // set chi to 0 if characteristic velocity has the wrong sign (Fryxell Eqn + // 64) + if (lambda_m >= 0) { + chi_L_m = 0; + } + if (lambda_0 >= 0) { + chi_L_0 = 0; + } + if (lambda_p >= 0) { + chi_L_p = 0; + } + if (lambda_m <= 0) { + chi_R_m = 0; + } + if (lambda_0 <= 0) { + chi_R_0 = 0; + } + if (lambda_p <= 0) { + chi_R_p = 0; + } + // use the chi values to correct the initial guesses and calculate final + // input states + p_L = p_L + (d_L * d_L * cl * cl) * (chi_L_p + chi_L_m); + vx_L = vx_L + d_L * cl * (chi_L_p - chi_L_m); + d_L = pow(((1.0 / d_L) - (chi_L_m + chi_L_0 + chi_L_p)), -1); + p_R = p_L + (d_R * d_R * cr * cr) * (chi_R_p + chi_R_m); + vx_R = vx_R + d_R * cr * (chi_R_p - chi_R_m); + d_R = pow(((1.0 / d_R) - (chi_R_m + chi_R_0 + chi_R_p)), -1); + #endif // CTU // Apply mimimum constraints - d_L = fmax(d_L, (Real) TINY_NUMBER); - d_R = fmax(d_R, (Real) TINY_NUMBER); - p_L = fmax(p_L, (Real) TINY_NUMBER); - p_R = fmax(p_R, (Real) TINY_NUMBER); - - // Convert the left and right states in the primitive to the conserved variables - // send final values back from kernel - // bounds_R refers to the right side of the i-1/2 interface - if (dir == 0) id = xid-1 + yid*nx + zid*nx*ny; - if (dir == 1) id = xid + (yid-1)*nx + zid*nx*ny; - if (dir == 2) id = xid + yid*nx + (zid-1)*nx*ny; - dev_bounds_R[ id] = d_L; - dev_bounds_R[o1*n_cells + id] = d_L*vx_L; - dev_bounds_R[o2*n_cells + id] = d_L*vy_L; - dev_bounds_R[o3*n_cells + id] = d_L*vz_L; - dev_bounds_R[4*n_cells + id] = p_L/(gamma-1.0) + 0.5*d_L*(vx_L*vx_L + vy_L*vy_L + vz_L*vz_L); - #ifdef SCALAR - for (int i=0; i 0.0) { del_q_G = 2.0*del_q_L*del_q_R / (del_q_L+del_q_R); } - else { del_q_G = 0.0; } - + if (del_q_L * del_q_R > 0.0) { + del_q_G = 2.0 * del_q_L * del_q_R / (del_q_L + del_q_R); + } else { + del_q_G = 0.0; + } // Monotonize the differences lim_slope_a = fmin(fabs(del_q_L), fabs(del_q_R)); lim_slope_b = fmin(fabs(del_q_C), fabs(del_q_G)); // Minmod limiter - //del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); + // del_q_m = sgn_CUDA(del_q_C)*fmin(2.0*lim_slope_a, fabs(del_q_C)); // Van Leer limiter - del_q_m = sgn_CUDA(del_q_C) * fmin((Real) 2.0*lim_slope_a, lim_slope_b); + del_q_m = sgn_CUDA(del_q_C) * fmin((Real)2.0 * lim_slope_a, lim_slope_b); return del_q_m; - } - -/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using parabolic reconstruction - in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) +/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, + Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R) + * \brief Calculates the left and right interface values for a cell using + parabolic reconstruction in the primitive variables with limited slopes + provided. Applies further monotonicity constraints.*/ +__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, + Real *q_L, Real *q_R) { // Calculate the left and right interface values using the limited slopes - *q_L = 0.5*(q_i + q_imo) - (1.0/6.0)*(del_q_i - del_q_imo); - *q_R = 0.5*(q_ipo + q_i) - (1.0/6.0)*(del_q_ipo - del_q_i); + *q_L = 0.5 * (q_i + q_imo) - (1.0 / 6.0) * (del_q_i - del_q_imo); + *q_R = 0.5 * (q_ipo + q_i) - (1.0 / 6.0) * (del_q_ipo - del_q_i); - // Apply further monotonicity constraints to ensure interface values lie between - // neighboring cell-centered values + // Apply further monotonicity constraints to ensure interface values lie + // between neighboring cell-centered values // local maximum or minimum criterion (Fryxell Eqn 52, Fig 11) - if ((*q_R - q_i)*(q_i - *q_L) <= 0) *q_L = *q_R = q_i; + if ((*q_R - q_i) * (q_i - *q_L) <= 0) *q_L = *q_R = q_i; // steep gradient criterion (Fryxell Eqn 53, Fig 12) - if (6.0*(*q_R - *q_L)*(q_i - 0.5*(*q_L + *q_R)) > (*q_R - *q_L)*(*q_R - *q_L)) *q_L = 3.0*q_i - 2.0*(*q_R); - if (6.0*(*q_R - *q_L)*(q_i - 0.5*(*q_L + *q_R)) < -(*q_R - *q_L)*(*q_R - *q_L)) *q_R = 3.0*q_i - 2.0*(*q_L); - - *q_L = fmax( fmin(q_i, q_imo), *q_L ); - *q_L = fmin( fmax(q_i, q_imo), *q_L ); - *q_R = fmax( fmin(q_i, q_ipo), *q_R ); - *q_R = fmin( fmax(q_i, q_ipo), *q_R ); + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) > (*q_R - *q_L) * (*q_R - *q_L)) { + *q_L = 3.0 * q_i - 2.0 * (*q_R); + } + if (6.0 * (*q_R - *q_L) * (q_i - 0.5 * (*q_L + *q_R)) < -(*q_R - *q_L) * (*q_R - *q_L)) { + *q_R = 3.0 * q_i - 2.0 * (*q_L); + } + *q_L = fmax(fmin(q_i, q_imo), *q_L); + *q_L = fmin(fmax(q_i, q_imo), *q_L); + *q_R = fmax(fmin(q_i, q_ipo), *q_R); + *q_R = fmin(fmax(q_i, q_ipo), *q_R); } - /*! \fn calc_d2_rho - * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) */ + * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) + */ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx) { - return (1. / (6*dx*dx)) * (rho_ipo - 2*rho_i + rho_imo); + return (1. / (6 * dx * dx)) * (rho_ipo - 2 * rho_i + rho_imo); } - /*! \fn calc_eta * \brief Returns a dimensionless quantity relating the 1st and 3rd derivatives See Fryxell Eqn 36. */ @@ -702,13 +766,10 @@ __device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, { Real A, B; - A = (d2rho_ipo - d2rho_imo)*dx*dx; + A = (d2rho_ipo - d2rho_imo) * dx * dx; B = 1.0 / (rho_ipo - rho_imo); return -A * B; } - - -#endif //PPMP -#endif //CUDA +#endif // PPMP diff --git a/src/reconstruction/ppmp_cuda.h b/src/reconstruction/ppmp_cuda.h index c8a85711e..064d328fa 100644 --- a/src/reconstruction/ppmp_cuda.h +++ b/src/reconstruction/ppmp_cuda.h @@ -1,30 +1,35 @@ /*! \file ppmp_cuda.h * \brief Declarations of the cuda ppmp kernels. */ -#ifdef CUDA - #ifndef PPMP_CUDA_H #define PPMP_CUDA_H - #include "../global/global.h" -/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields) - * \brief When passed a stencil of conserved variables, returns the left and right - boundary values for the interface calculated using ppm with limiting in the primitive variables. */ -__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); +/*! \fn __global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real + *dev_bounds_R, int nx, int ny, int nz, int n_ghost, Real dx, Real dt, Real + gamma, int dir, int n_fields) + * \brief When passed a stencil of conserved variables, returns the left and + right boundary values for the interface calculated using ppm with limiting in + the primitive variables. */ +__global__ void PPMP_cuda(Real *dev_conserved, Real *dev_bounds_L, Real *dev_bounds_R, int nx, int ny, int nz, + int n_ghost, Real dx, Real dt, Real gamma, int dir, int n_fields); /*! \fn __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo) * \brief Calculates the limited slope across a cell.*/ __device__ Real Calculate_Slope(Real q_imo, Real q_i, Real q_ipo); -/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real *q_L, Real *q_R) - * \brief Calculates the left and right interface values for a cell using parabolic reconstruction - in the primitive variables with limited slopes provided. Applies further monotonicity constraints.*/ -__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, Real *q_L, Real *q_R); +/*! \fn __device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, + Real *q_L, Real *q_R) + * \brief Calculates the left and right interface values for a cell using + parabolic reconstruction in the primitive variables with limited slopes + provided. Applies further monotonicity constraints.*/ +__device__ void Interface_Values_PPM(Real q_imo, Real q_i, Real q_ipo, Real del_q_imo, Real del_q_i, Real del_q_ipo, + Real *q_L, Real *q_R); /*! \fn calc_d2_rho - * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) */ + * \brief Returns the second derivative of rho across zone i. (Fryxell Eqn 35) + */ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx); /*! \fn calc_eta @@ -32,5 +37,4 @@ __device__ Real calc_d2_rho(Real rho_imo, Real rho_i, Real rho_ipo, Real dx); See Fryxell Eqn 36. */ __device__ Real calc_eta(Real d2rho_imo, Real d2rho_ipo, Real dx, Real rho_imo, Real rho_ipo); -#endif // PPMP_CUDA_H -#endif // CUDA +#endif // PPMP_CUDA_H diff --git a/src/reconstruction/reconstruction.h b/src/reconstruction/reconstruction.h new file mode 100644 index 000000000..23442a776 --- /dev/null +++ b/src/reconstruction/reconstruction.h @@ -0,0 +1,970 @@ +/*! + * \file reconstruction.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contain the various structs and device functions needed for interface reconstruction + * + */ + +#pragma once + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/mhd_utilities.h" + +/*! + * \brief Namespace to contain various utilities for the interface reconstruction kernels + * + */ +namespace reconstruction +{ +// ===================================================================================================================== +/*! + * \brief A struct for the primitive variables + * + */ +struct Primitive { + // Hydro variables + Real density, velocity_x, velocity_y, velocity_z, pressure; + +#ifdef MHD + // These are all cell centered values + Real magnetic_x, magnetic_y, magnetic_z; +#endif // MHD + +#ifdef DE + Real gas_energy; +#endif // DE + +#ifdef SCALAR + Real scalar[grid_enum::nscalars]; +#endif // SCALAR +}; +// ===================================================================================================================== + +// ===================================================================================================================== +struct EigenVecs { + Real magnetosonic_speed_fast, magnetosonic_speed_slow, magnetosonic_speed_fast_squared, + magnetosonic_speed_slow_squared; + Real alpha_fast, alpha_slow; + Real beta_y, beta_z; + Real n_fs, sign; + /// The non-primed values are used in the conversion from characteristic to primitive variables + Real q_fast, q_slow; + Real a_fast, a_slow; + /// The primed values are used in the conversion from primitive to characteristic variables + Real q_prime_fast, q_prime_slow; + Real a_prime_fast, a_prime_slow; +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief A struct for the characteristic variables + * + */ +struct Characteristic { + // Hydro variables + Real a0, a1, a2, a3, a4; + +#ifdef MHD + Real a5, a6; +#endif // MHD +}; +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Determine if a thread is within the allowed range + * + * \tparam order The order of the reconstruction. 2 for PLM, 3 for PPM + * \param nx The number of cells in the X-direction + * \param ny The number of cells in the Y-direction + * \param nz The number of cells in the Z-direction + * \param xid The X thread index + * \param yid The Y thread index + * \param zid The Z thread index + * \return true The thread is NOT in the allowed range + * \return false The thread is in the allowed range + */ +template +bool __device__ __host__ __inline__ Thread_Guard(int const &nx, int const &ny, int const &nz, int const &xid, + int const &yid, int const &zid) +{ + // These checks all make sure that the xid is such that the thread won't try to load any memory that is out of bounds + + // X check + bool out_of_bounds_thread = xid < order - 1 or xid >= nx - order; + + // Y check, only used for 2D and 3D + if (ny > 1) { + out_of_bounds_thread = yid < order - 1 or yid >= ny - order or out_of_bounds_thread; + } + + // z check, only used for 3D + if (nz > 1) { + out_of_bounds_thread = zid < order - 1 or zid >= nz - order or out_of_bounds_thread; + } + // This is needed in the case that nz == 1 to avoid overrun + else { + out_of_bounds_thread = zid >= nz or out_of_bounds_thread; + } + + return out_of_bounds_thread; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Load the data for reconstruction + * + * \param[in] dev_conserved The conserved array + * \param[in] xid The xid of the cell to load data from + * \param[in] yid The yid of the cell to load data from + * \param[in] zid The zid of the cell to load data from + * \param[in] nx Size in the X direction + * \param[in] ny Size in the Y direction + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + * \return Primitive The loaded cell data + */ +Primitive __device__ __host__ __inline__ Load_Data(Real const *dev_conserved, size_t const &xid, size_t const &yid, + size_t const &zid, size_t const &nx, size_t const &ny, + size_t const &n_cells, size_t const &o1, size_t const &o2, + size_t const &o3, Real const &gamma) +{ // Compute index + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + + // Declare the variable we will return + Primitive loaded_data; + + // Load hydro variables except pressure + loaded_data.density = dev_conserved[grid_enum::density * n_cells + id]; + loaded_data.velocity_x = dev_conserved[o1 * n_cells + id] / loaded_data.density; + loaded_data.velocity_y = dev_conserved[o2 * n_cells + id] / loaded_data.density; + loaded_data.velocity_z = dev_conserved[o3 * n_cells + id] / loaded_data.density; + + // Load MHD variables. Note that I only need the centered values for the transverse fields except for the initial + // computation of the primitive variables +#ifdef MHD + auto magnetic_centered = mhd::utils::cellCenteredMagneticFields(dev_conserved, id, xid, yid, zid, n_cells, nx, ny); + switch (o1) { + case grid_enum::momentum_x: + loaded_data.magnetic_x = magnetic_centered.x; + loaded_data.magnetic_y = magnetic_centered.y; + loaded_data.magnetic_z = magnetic_centered.z; + break; + case grid_enum::momentum_y: + loaded_data.magnetic_x = magnetic_centered.y; + loaded_data.magnetic_y = magnetic_centered.z; + loaded_data.magnetic_z = magnetic_centered.x; + break; + case grid_enum::momentum_z: + loaded_data.magnetic_x = magnetic_centered.z; + loaded_data.magnetic_y = magnetic_centered.x; + loaded_data.magnetic_z = magnetic_centered.y; + break; + } +#endif // MHD + +// Load pressure accounting for duel energy if enabled +#ifdef DE // DE + Real const E = dev_conserved[grid_enum::Energy * n_cells + id]; + Real const gas_energy = dev_conserved[grid_enum::GasEnergy * n_cells + id]; + + Real E_non_thermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + loaded_data.density, loaded_data.velocity_x, loaded_data.velocity_y, loaded_data.velocity_z); + + #ifdef MHD + E_non_thermal += mhd::utils::computeMagneticEnergy(magnetic_centered.x, magnetic_centered.y, magnetic_centered.z); + #endif // MHD + + loaded_data.pressure = hydro_utilities::Get_Pressure_From_DE(E, E - E_non_thermal, gas_energy, gamma); + loaded_data.gas_energy = gas_energy / loaded_data.density; +#else // not DE + #ifdef MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma, loaded_data.magnetic_x, loaded_data.magnetic_y, + loaded_data.magnetic_z); + #else // not MHD + loaded_data.pressure = hydro_utilities::Calc_Pressure_Primitive( + dev_conserved[grid_enum::Energy * n_cells + id], loaded_data.density, loaded_data.velocity_x, + loaded_data.velocity_y, loaded_data.velocity_z, gamma); + #endif // MHD +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + loaded_data.scalar[i] = dev_conserved[(grid_enum::scalar + i) * n_cells + id] / loaded_data.density; + } +#endif // SCALAR + + return loaded_data; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute a simple slope. Equation is `coef * (right - left)`. + * + * \param[in] left The data with the lower index (on the "left" side) + * \param[in] right The data with the higher index (on the "right" side) + * \param[in] coef The coefficient to multiply the slope by. Defaults to 1.0 + * \return Primitive The slopes + */ +Primitive __device__ __host__ __inline__ Compute_Slope(Primitive const &left, Primitive const &right, + Real const &coef = 1.0) +{ + Primitive slopes; + + slopes.density = coef * (right.density - left.density); + slopes.velocity_x = coef * (right.velocity_x - left.velocity_x); + slopes.velocity_y = coef * (right.velocity_y - left.velocity_y); + slopes.velocity_z = coef * (right.velocity_z - left.velocity_z); + slopes.pressure = coef * (right.pressure - left.pressure); + +#ifdef MHD + slopes.magnetic_y = coef * (right.magnetic_y - left.magnetic_y); + slopes.magnetic_z = coef * (right.magnetic_z - left.magnetic_z); +#endif // MHD + +#ifdef DE + slopes.gas_energy = coef * (right.gas_energy - left.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + slopes.scalar[i] = coef * (right.scalar[i] - left.scalar[i]); + } +#endif // SCALAR + + return slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the Van Lear slope from the left and right slopes + * + * \param[in] left_slope The left slope + * \param[in] right_slope The right slope + * \return Primitive The Van Leer slope + */ +Primitive __device__ __host__ __inline__ Van_Leer_Slope(Primitive const &left_slope, Primitive const &right_slope) +{ + Primitive vl_slopes; + + auto Calc_Vl_Slope = [](Real const &left, Real const &right) -> Real { + if (left * right > 0.0) { + return 2.0 * left * right / (left + right); + } else { + return 0.0; + } + }; + + vl_slopes.density = Calc_Vl_Slope(left_slope.density, right_slope.density); + vl_slopes.velocity_x = Calc_Vl_Slope(left_slope.velocity_x, right_slope.velocity_x); + vl_slopes.velocity_y = Calc_Vl_Slope(left_slope.velocity_y, right_slope.velocity_y); + vl_slopes.velocity_z = Calc_Vl_Slope(left_slope.velocity_z, right_slope.velocity_z); + vl_slopes.pressure = Calc_Vl_Slope(left_slope.pressure, right_slope.pressure); + +#ifdef MHD + vl_slopes.magnetic_y = Calc_Vl_Slope(left_slope.magnetic_y, right_slope.magnetic_y); + vl_slopes.magnetic_z = Calc_Vl_Slope(left_slope.magnetic_z, right_slope.magnetic_z); +#endif // MHD + +#ifdef DE + vl_slopes.gas_energy = Calc_Vl_Slope(left_slope.gas_energy, right_slope.gas_energy); +#endif // DE + +#ifdef SCALAR + for (size_t i = 0; i < grid_enum::nscalars; i++) { + vl_slopes.scalar[i] = Calc_Vl_Slope(left_slope.scalar[i], right_slope.scalar[i]); + } +#endif // SCALAR + + return vl_slopes; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the eigenvectors in the given cell + * + * \param[in] primitive The primitive variables in a particular cell + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return EigenVecs + */ +#ifdef MHD +EigenVecs __device__ __inline__ Compute_Eigenvectors(Primitive const &primitive, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + EigenVecs output; + // This is taken from Stone et al. 2008, appendix A. Equation numbers will be quoted as relevant + + // Compute wave speeds and their squares + output.magnetosonic_speed_fast = mhd::utils::fastMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + output.magnetosonic_speed_slow = mhd::utils::slowMagnetosonicSpeed( + primitive.density, primitive.pressure, primitive.magnetic_x, primitive.magnetic_y, primitive.magnetic_z, gamma); + + output.magnetosonic_speed_fast_squared = output.magnetosonic_speed_fast * output.magnetosonic_speed_fast; + output.magnetosonic_speed_slow_squared = output.magnetosonic_speed_slow * output.magnetosonic_speed_slow; + + // Compute Alphas (equation A16) + if (Real const denom = (output.magnetosonic_speed_fast_squared - output.magnetosonic_speed_slow_squared), + numerator_2 = (output.magnetosonic_speed_fast_squared - sound_speed_squared); + denom <= 0.0 or numerator_2 <= 0.0) { + output.alpha_fast = 1.0; + output.alpha_slow = 0.0; + } else if (Real const numerator_1 = (sound_speed_squared - output.magnetosonic_speed_slow_squared); + numerator_1 <= 0.0) { + output.alpha_fast = 0.0; + output.alpha_slow = 1.0; + } else { + output.alpha_fast = sqrt(numerator_1 / denom); + output.alpha_slow = sqrt(numerator_2 / denom); + } + + // Compute Betas (equation A17). Note that rhypot can return an inf if By and Bz are both zero, the isfinite check + // handles that case + Real const beta_denom = rhypot(primitive.magnetic_y, primitive.magnetic_z); + output.beta_y = (isfinite(beta_denom)) ? primitive.magnetic_y * beta_denom : 1.0; + output.beta_z = (isfinite(beta_denom)) ? primitive.magnetic_z * beta_denom : 0.0; + + // Compute Q(s) (equation A14) + output.sign = copysign(1.0, primitive.magnetic_x); + output.n_fs = 0.5 / sound_speed_squared; // equation A19 + output.q_prime_fast = output.sign * output.n_fs * output.alpha_fast * output.magnetosonic_speed_fast; + output.q_prime_slow = output.sign * output.n_fs * output.alpha_slow * output.magnetosonic_speed_slow; + output.q_fast = output.sign * output.alpha_fast * output.magnetosonic_speed_fast; + output.q_slow = output.sign * output.alpha_slow * output.magnetosonic_speed_slow; + + // Compute A(s) (equation A15) + output.a_fast = output.alpha_fast * sound_speed * sqrt(primitive.density); + output.a_slow = output.alpha_slow * sound_speed * sqrt(primitive.density); + output.a_prime_fast = 0.5 * output.alpha_fast / (sound_speed * sqrt(primitive.density)); + output.a_prime_slow = 0.5 * output.alpha_slow / (sound_speed * sqrt(primitive.density)); + + return output; +} +#endif // MHD +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the primitive variables slopes to the characteristic variables slopes. Stone Eqn 37. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] primitive_slope The primitive variables slopes + * \param[in] EigenVecs The eigenvectors + * \param[in] sound_speed The speed of sound + * \param[in] sound_speed_squared The speed of sound squared + * \param[in] gamma The adiabatic index + * \return Characteristic + */ +Characteristic __device__ __inline__ Primitive_To_Characteristic(Primitive const &primitive, + Primitive const &primitive_slope, + EigenVecs const &eigen, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + Characteristic output; + +#ifdef MHD + // Multiply the slopes by the left eigenvector matrix given in equation 18 + Real const inverse_sqrt_density = rsqrt(primitive.density); + output.a0 = + eigen.n_fs * eigen.alpha_fast * + (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) + + eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) + + eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + + output.a1 = + 0.5 * + (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_z) - + eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density + primitive_slope.velocity_y)); + + output.a2 = + eigen.n_fs * eigen.alpha_slow * + (primitive_slope.pressure / primitive.density - eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) - + eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) - + eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + + output.a3 = primitive_slope.density - primitive_slope.pressure / sound_speed_squared; + + output.a4 = + eigen.n_fs * eigen.alpha_slow * + (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_slow * primitive_slope.velocity_x) + + eigen.q_prime_fast * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) - + eigen.a_prime_fast * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + output.a5 = + 0.5 * + (eigen.beta_y * (primitive_slope.magnetic_z * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_z) - + eigen.beta_z * (primitive_slope.magnetic_y * eigen.sign * inverse_sqrt_density - primitive_slope.velocity_y)); + + output.a6 = + eigen.n_fs * eigen.alpha_fast * + (primitive_slope.pressure / primitive.density + eigen.magnetosonic_speed_fast * primitive_slope.velocity_x) - + eigen.q_prime_slow * (eigen.beta_y * primitive_slope.velocity_y + eigen.beta_z * primitive_slope.velocity_z) + + eigen.a_prime_slow * (eigen.beta_y * primitive_slope.magnetic_y + eigen.beta_z * primitive_slope.magnetic_z); + +#else // not MHD + output.a0 = -primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); + output.a1 = primitive_slope.density - primitive_slope.pressure / (sound_speed_squared); + output.a2 = primitive_slope.velocity_y; + output.a3 = primitive_slope.velocity_z; + output.a4 = primitive.density * primitive_slope.velocity_x / (2.0 * sound_speed) + + primitive_slope.pressure / (2.0 * sound_speed_squared); +#endif // MHD + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Project from the characteristic variables slopes to the primitive variables slopes. Stone Eqn 39. Use the + * eigenvectors given in Stone 2008, Appendix A + * + * \param[in] primitive The primitive variables + * \param[in] characteristic_slope The characteristic slopes + * \param[in] eigen The eigenvectors + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return Primitive The state in primitive variables + */ +Primitive __device__ __host__ __inline__ Characteristic_To_Primitive(Primitive const &primitive, + Characteristic const &characteristic_slope, + EigenVecs const &eigen, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + Primitive output; +#ifdef MHD + // Multiply the slopes by the right eigenvector matrix given in equation 12 + output.density = primitive.density * (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)) + + characteristic_slope.a3; + output.velocity_x = + eigen.magnetosonic_speed_fast * eigen.alpha_fast * (characteristic_slope.a6 - characteristic_slope.a0) + + eigen.magnetosonic_speed_slow * eigen.alpha_slow * (characteristic_slope.a4 - characteristic_slope.a2); + output.velocity_y = eigen.beta_y * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + eigen.beta_z * (characteristic_slope.a5 - characteristic_slope.a1); + output.velocity_z = eigen.beta_z * (eigen.q_slow * (characteristic_slope.a0 - characteristic_slope.a6) + + eigen.q_fast * (characteristic_slope.a4 - characteristic_slope.a2)) + + eigen.beta_y * (characteristic_slope.a1 - characteristic_slope.a5); + output.pressure = primitive.density * sound_speed_squared * + (eigen.alpha_fast * (characteristic_slope.a0 + characteristic_slope.a6) + + eigen.alpha_slow * (characteristic_slope.a2 + characteristic_slope.a4)); + output.magnetic_y = + eigen.beta_y * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) - + eigen.beta_z * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + output.magnetic_z = + eigen.beta_z * (eigen.a_slow * (characteristic_slope.a0 + characteristic_slope.a6) - + eigen.a_fast * (characteristic_slope.a2 + characteristic_slope.a4)) + + eigen.beta_y * eigen.sign * sqrt(primitive.density) * (characteristic_slope.a5 + characteristic_slope.a1); + +#else // not MHD + output.density = characteristic_slope.a0 + characteristic_slope.a1 + characteristic_slope.a4; + output.velocity_x = sound_speed / primitive.density * (characteristic_slope.a4 - characteristic_slope.a0); + output.velocity_y = characteristic_slope.a2; + output.velocity_z = characteristic_slope.a3; + output.pressure = sound_speed_squared * (characteristic_slope.a0 + characteristic_slope.a4); +#endif // MHD + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Monotonize the characteristic slopes and project back into the primitive slopes + * + * \param[in] primitive The primitive variables + * \param[in] del_L The left primitive slopes + * \param[in] del_R The right primitive slopes + * \param[in] del_C The centered primitive slopes + * \param[in] del_G The Van Leer primitive slopes + * \param[in] del_a_L The left characteristic slopes + * \param[in] del_a_R The right characteristic slopes + * \param[in] del_a_C The centered characteristic slopes + * \param[in] del_a_G The Van Leer characteristic slopes + * \param[in] sound_speed The sound speed + * \param[in] sound_speed_squared The sound speed squared + * \param[in] gamma The adiabatic index + * \return Primitive The Monotonized primitive slopes + */ +Primitive __device__ __inline__ Monotonize_Characteristic_Return_Primitive( + Primitive const &primitive, Primitive const &del_L, Primitive const &del_R, Primitive const &del_C, + Primitive const &del_G, Characteristic const &del_a_L, Characteristic const &del_a_R, Characteristic const &del_a_C, + Characteristic const &del_a_G, EigenVecs const &eigenvectors, Real const &sound_speed, + Real const &sound_speed_squared, Real const &gamma) +{ + // The function that will actually do the monotozation + auto Monotonize = [](Real const &left, Real const &right, Real const ¢ered, Real const &van_leer) -> Real { + if (left * right > 0.0) { + Real const lim_slope_a = 2.0 * fmin(fabs(left), fabs(right)); + Real const lim_slope_b = fmin(fabs(centered), fabs(van_leer)); + return copysign(fmin(lim_slope_a, lim_slope_b), centered); + } else { + return 0.0; + } + }; + + // the monotonized difference in the characteristic variables + Characteristic del_a_m; + + // Monotonize the slopes + del_a_m.a0 = Monotonize(del_a_L.a0, del_a_R.a0, del_a_C.a0, del_a_G.a0); + del_a_m.a1 = Monotonize(del_a_L.a1, del_a_R.a1, del_a_C.a1, del_a_G.a1); + del_a_m.a2 = Monotonize(del_a_L.a2, del_a_R.a2, del_a_C.a2, del_a_G.a2); + del_a_m.a3 = Monotonize(del_a_L.a3, del_a_R.a3, del_a_C.a3, del_a_G.a3); + del_a_m.a4 = Monotonize(del_a_L.a4, del_a_R.a4, del_a_C.a4, del_a_G.a4); + +#ifdef MHD + del_a_m.a5 = Monotonize(del_a_L.a5, del_a_R.a5, del_a_C.a5, del_a_G.a5); + del_a_m.a6 = Monotonize(del_a_L.a6, del_a_R.a6, del_a_C.a6, del_a_G.a6); +#endif // MHD + + // Project into the primitive variables. Note the return by reference to preserve the values in the gas_energy and + // scalars + Primitive output = + Characteristic_To_Primitive(primitive, del_a_m, eigenvectors, sound_speed, sound_speed_squared, gamma); + +#ifdef DE + output.gas_energy = Monotonize(del_L.gas_energy, del_R.gas_energy, del_C.gas_energy, del_G.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = Monotonize(del_L.scalar[i], del_R.scalar[i], del_C.scalar[i], del_G.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Monotonize the parabolic interface states + * + * \param[in] cell_i The state in cell i + * \param[in] cell_im1 The state in cell i-1 + * \param[in] cell_ip1 The state in cell i+1 + * \param[in,out] interface_L_iph The left interface state at i+1/2 + * \param[in,out] interface_R_imh The right interface state at i-1/2 + * \return Primitive + */ +void __device__ __host__ __inline__ Monotonize_Parabolic_Interface(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &cell_ip1, + Primitive &interface_L_iph, + Primitive &interface_R_imh) +{ + // The function that will actually do the monotozation. Note the return by refernce of the interface state + auto Monotonize = [](Real const &state_i, Real const &state_im1, Real const &state_ip1, Real &interface_L, + Real &interface_R) { + // Some terms we need for the comparisons + Real const term_1 = 6.0 * (interface_L - interface_R) * (state_i - 0.5 * (interface_R + interface_L)); + Real const term_2 = pow(interface_L - interface_R, 2.0); + + // First monotonicity constraint. Equations 47-49 in Stone et al. 2008 + if ((interface_L - state_i) * (state_i - interface_R) <= 0.0) { + interface_L = state_i; + interface_R = state_i; + } + // Second monotonicity constraint. Equations 50 & 51 in Stone et al. 2008 + else if (term_1 > term_2) { + interface_R = 3.0 * state_i - 2.0 * interface_L; + } + // Third monotonicity constraint. Equations 52 & 53 in Stone et al. 2008 + else if (term_1 < -term_2) { + interface_L = 3.0 * state_i - 2.0 * interface_R; + } + + // Bound the interface to lie between adjacent cell centered values + interface_R = fmax(fmin(state_i, state_im1), interface_R); + interface_R = fmin(fmax(state_i, state_im1), interface_R); + interface_L = fmax(fmin(state_i, state_ip1), interface_L); + interface_L = fmin(fmax(state_i, state_ip1), interface_L); + }; + + // Monotonize each interface state + Monotonize(cell_i.density, cell_im1.density, cell_ip1.density, interface_L_iph.density, interface_R_imh.density); + Monotonize(cell_i.velocity_x, cell_im1.velocity_x, cell_ip1.velocity_x, interface_L_iph.velocity_x, + interface_R_imh.velocity_x); + Monotonize(cell_i.velocity_y, cell_im1.velocity_y, cell_ip1.velocity_y, interface_L_iph.velocity_y, + interface_R_imh.velocity_y); + Monotonize(cell_i.velocity_z, cell_im1.velocity_z, cell_ip1.velocity_z, interface_L_iph.velocity_z, + interface_R_imh.velocity_z); + Monotonize(cell_i.pressure, cell_im1.pressure, cell_ip1.pressure, interface_L_iph.pressure, interface_R_imh.pressure); + +#ifdef MHD + Monotonize(cell_i.magnetic_y, cell_im1.magnetic_y, cell_ip1.magnetic_y, interface_L_iph.magnetic_y, + interface_R_imh.magnetic_y); + Monotonize(cell_i.magnetic_z, cell_im1.magnetic_z, cell_ip1.magnetic_z, interface_L_iph.magnetic_z, + interface_R_imh.magnetic_z); +#endif // MHD + +#ifdef DE + Monotonize(cell_i.gas_energy, cell_im1.gas_energy, cell_ip1.gas_energy, interface_L_iph.gas_energy, + interface_R_imh.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + Monotonize(cell_i.scalar[i], cell_im1.scalar[i], cell_ip1.scalar[i], interface_L_iph.scalar[i], + interface_R_imh.scalar[i]); + } +#endif // SCALAR +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the interface state from the slope and cell centered state using linear interpolation + * + * \param[in] primitive The cell centered state + * \param[in] slopes The slopes + * \param[in] sign Whether to add or subtract the slope. +1 to add it and -1 to subtract it + * \return Primitive The interface state + */ +Primitive __device__ __host__ __inline__ Calc_Interface_Linear(Primitive const &primitive, Primitive const &slopes, + Real const &sign) +{ + Primitive output; + + auto interface = [&sign](Real const &state, Real const &slope) -> Real { return state + sign * 0.5 * slope; }; + + output.density = interface(primitive.density, slopes.density); + output.velocity_x = interface(primitive.velocity_x, slopes.velocity_x); + output.velocity_y = interface(primitive.velocity_y, slopes.velocity_y); + output.velocity_z = interface(primitive.velocity_z, slopes.velocity_z); + output.pressure = interface(primitive.pressure, slopes.pressure); + +#ifdef MHD + output.magnetic_y = interface(primitive.magnetic_y, slopes.magnetic_y); + output.magnetic_z = interface(primitive.magnetic_z, slopes.magnetic_z); +#endif // MHD + +#ifdef DE + output.gas_energy = interface(primitive.gas_energy, slopes.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = interface(primitive.scalar[i], slopes.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Apply limiting the the primitive interfaces in PLM reconstructions + * + * \param[in,out] interface_L_iph The unlimited left plus 1/2 interface + * \param[in,out] interface_R_imh The unlimited right minus 1/2 interface + * \param[in] cell_imo The cell centered values at i-1 + * \param[in] cell_i The cell centered values at i + * \param[in] cell_ipo The cell centered values at i+1 + */ +void __device__ __host__ __inline__ Plm_Limit_Interfaces(Primitive &interface_L_iph, Primitive &interface_R_imh, + Primitive const &cell_imo, Primitive const &cell_i, + Primitive const &cell_ipo) +{ + auto limiter = [](Real &l_iph, Real &r_imh, Real const &val_imo, Real const &val_i, Real const &val_ipo) { + r_imh = fmax(fmin(val_i, val_imo), r_imh); + r_imh = fmin(fmax(val_i, val_imo), r_imh); + l_iph = fmax(fmin(val_i, val_ipo), l_iph); + l_iph = fmin(fmax(val_i, val_ipo), l_iph); + }; + + limiter(interface_L_iph.density, interface_R_imh.density, cell_imo.density, cell_i.density, cell_ipo.density); + limiter(interface_L_iph.velocity_x, interface_R_imh.velocity_x, cell_imo.velocity_x, cell_i.velocity_x, + cell_ipo.velocity_x); + limiter(interface_L_iph.velocity_y, interface_R_imh.velocity_y, cell_imo.velocity_y, cell_i.velocity_y, + cell_ipo.velocity_y); + limiter(interface_L_iph.velocity_z, interface_R_imh.velocity_z, cell_imo.velocity_z, cell_i.velocity_z, + cell_ipo.velocity_z); + limiter(interface_L_iph.pressure, interface_R_imh.pressure, cell_imo.pressure, cell_i.pressure, cell_ipo.pressure); + +#ifdef MHD + limiter(interface_L_iph.magnetic_y, interface_R_imh.magnetic_y, cell_imo.magnetic_y, cell_i.magnetic_y, + cell_ipo.magnetic_y); + limiter(interface_L_iph.magnetic_z, interface_R_imh.magnetic_z, cell_imo.magnetic_z, cell_i.magnetic_z, + cell_ipo.magnetic_z); +#endif // MHD + +#ifdef DE + limiter(interface_L_iph.gas_energy, interface_R_imh.gas_energy, cell_imo.gas_energy, cell_i.gas_energy, + cell_ipo.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + limiter(interface_L_iph.scalar[i], interface_R_imh.scalar[i], cell_imo.scalar[i], cell_i.scalar[i], + cell_ipo.scalar[i]); + } +#endif // SCALAR +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the interface state for the CTU version fo the reconstructor from the slope and cell centered state + * using parabolic interpolation + * + * \param[in] cell_i The state in cell i + * \param[in] cell_im1 The state in cell i-1 + * \param[in] slopes_i The slopes in cell i + * \param[in] slopes_im1 The slopes in cell i-1 + * \return Primitive The interface state + */ +Primitive __device__ __host__ __inline__ Calc_Interface_Parabolic(Primitive const &cell_i, Primitive const &cell_im1, + Primitive const &slopes_i, + Primitive const &slopes_im1) +{ + Primitive output; + + auto interface = [](Real const &state_i, Real const &state_im1, Real const &slope_i, Real const &slope_im1) -> Real { + return 0.5 * (state_i + state_im1) - (slope_i - slope_im1) / 6.0; + }; + + output.density = interface(cell_i.density, cell_im1.density, slopes_i.density, slopes_im1.density); + output.velocity_x = interface(cell_i.velocity_x, cell_im1.velocity_x, slopes_i.velocity_x, slopes_im1.velocity_x); + output.velocity_y = interface(cell_i.velocity_y, cell_im1.velocity_y, slopes_i.velocity_y, slopes_im1.velocity_y); + output.velocity_z = interface(cell_i.velocity_z, cell_im1.velocity_z, slopes_i.velocity_z, slopes_im1.velocity_z); + output.pressure = interface(cell_i.pressure, cell_im1.pressure, slopes_i.pressure, slopes_im1.pressure); + +#ifdef MHD + output.magnetic_y = interface(cell_i.magnetic_y, cell_im1.magnetic_y, slopes_i.magnetic_y, slopes_im1.magnetic_y); + output.magnetic_z = interface(cell_i.magnetic_z, cell_im1.magnetic_z, slopes_i.magnetic_z, slopes_im1.magnetic_z); +#endif // MHD + +#ifdef DE + output.gas_energy = interface(cell_i.gas_energy, cell_im1.gas_energy, slopes_i.gas_energy, slopes_im1.gas_energy); +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + output.scalar[i] = interface(cell_i.scalar[i], cell_im1.scalar[i], slopes_i.scalar[i], slopes_im1.scalar[i]); + } +#endif // SCALAR + + return output; +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Compute the PPM interface state for a given field/stencil. + * + * \details This method is heavily based on the implementation in Athena++. See the following papers for details + * - K. Felker & J. Stone, "A fourth-order accurate finite volume method for ideal MHD via upwind constrained + * transport", JCP, 375, (2018) + * - P. Colella & P. Woodward, "The Piecewise Parabolic Method (PPM) for Gas-Dynamical Simulations", JCP, 54, 174 + * (1984) + * - P. Colella & M. Sekora, "A limiter for PPM that preserves accuracy at smooth extrema", JCP, 227, 7069 (2008) + * - P. McCorquodale & P. Colella, "A high-order finite-volume method for conservation laws on locally refined + * grids", CAMCoS, 6, 1 (2011) + * - P. Colella, M.R. Dorr, J. Hittinger, D. Martin, "High-order, finite-volume methods in mapped coordinates", JCP, + * 230, 2952 (2011) + * + * \param[in] cell_im2 The value of the field/stencil at i-2 + * \param[in] cell_im1 The value of the field/stencil at i-1 + * \param[in] cell_i The value of the field/stencil at i + * \param[in] cell_ip1 The value of the field/stencil at i+1 + * \param[in] cell_ip2 The value of the field/stencil at i+2 + * \param[out] interface_L_iph The left interface at the i+1/2 face + * \param[out] interface_R_imh The right interface at the i-1/2 face + */ +void __device__ __host__ __inline__ PPM_Single_Variable(Real const &cell_im2, Real const &cell_im1, Real const &cell_i, + Real const &cell_ip1, Real const &cell_ip2, + Real &interface_L_iph, Real &interface_R_imh) +{ + // Let's start by setting up some things that we'll need later + + // Colella & Sekora 2008 constant used in second derivative limiter + Real const C2 = 1.25; + + // This lambda function is used for limiting the interfaces + auto limit_interface = [&C2](Real const &cell_i, Real const &cell_im1, Real const &interface, Real const &slope_2nd_i, + Real const &slope_2nd_im1) -> Real { + // Colella et al. 2011 eq. 85b. + // 85a is slope_2nd_im1 and 85c is slope_2nd_i + Real slope_2nd_centered = 3.0 * (cell_im1 + cell_i - 2.0 * interface); + + Real limited_slope = 0.0; + if (SIGN(slope_2nd_centered) == SIGN(slope_2nd_im1) and SIGN(slope_2nd_centered) == SIGN(slope_2nd_i)) { + limited_slope = SIGN(slope_2nd_centered) * + fmin(C2 * abs(slope_2nd_im1), fmin(C2 * abs(slope_2nd_i), abs(slope_2nd_centered))); + } + + // Collela et al. 2011 eq. 84a & 84b + Real const diff_left = interface - cell_im1; + Real const diff_right = cell_i - interface; + if (diff_left * diff_right < 0.0) { + // Local extrema detected at the interface + return 0.5 * (cell_im1 + cell_i) - limited_slope / 6.0; + } else { + return interface; + } + }; + + // Now that the setup is done we can start computing the interface states + + // Compute average slopes + Real const slope_left = (cell_i - cell_im1); + Real const slope_right = (cell_ip1 - cell_i); + Real const slope_avg_im1 = 0.5 * slope_left + 0.5 * (cell_im1 - cell_im2); + Real const slope_avg_i = 0.5 * slope_right + 0.5 * slope_left; + Real const slope_avg_ip1 = 0.5 * (cell_ip2 - cell_ip1) + 0.5 * slope_right; + + // Approximate interface average at i-1/2 and i+1/2 using PPM + // P. Colella & P. Woodward 1984 eq. 1.6 + interface_R_imh = 0.5 * (cell_im1 + cell_i) + (slope_avg_im1 - slope_avg_i) / 6.0; + interface_L_iph = 0.5 * (cell_i + cell_ip1) + (slope_avg_i - slope_avg_ip1) / 6.0; + + // Limit interpolated interface states (Colella et al. 2011 section 4.3.1) + + // Approximate second derivative at interfaces for smooth extrema preservation + // Colella et al. 2011 eq 85a + Real const slope_2nd_im1 = cell_im2 + cell_i - 2.0 * cell_im1; + Real const slope_2nd_i = cell_im1 + cell_ip1 - 2.0 * cell_i; + Real const slope_2nd_ip1 = cell_i + cell_ip2 - 2.0 * cell_ip1; + + interface_R_imh = limit_interface(cell_i, cell_im1, interface_R_imh, slope_2nd_i, slope_2nd_im1); + interface_L_iph = limit_interface(cell_ip1, cell_i, interface_L_iph, slope_2nd_ip1, slope_2nd_i); + + // Compute cell-centered difference stencils (McCorquodale & Colella 2011 section 2.4.1) + + // Apply Colella & Sekora limiters to parabolic interpolant + Real slope_2nd_face = 6.0 * (interface_R_imh + interface_L_iph - 2.0 * cell_i); + + Real slope_2nd_limited = 0.0; + if (SIGN(slope_2nd_im1) == SIGN(slope_2nd_i) and SIGN(slope_2nd_im1) == SIGN(slope_2nd_ip1) and + SIGN(slope_2nd_im1) == SIGN(slope_2nd_face)) { + // Extrema is smooth + // Colella & Sekora eq. 22 + slope_2nd_limited = SIGN(slope_2nd_face) * fmin(fmin(C2 * abs(slope_2nd_im1), C2 * abs(slope_2nd_i)), + fmin(C2 * abs(slope_2nd_ip1), abs(slope_2nd_face))); + } + + // Check if 2nd derivative is close to roundoff error + Real cell_max = fmax(abs(cell_im2), abs(cell_im1)); + cell_max = fmax(cell_max, abs(cell_i)); + cell_max = fmax(cell_max, abs(cell_ip1)); + cell_max = fmax(cell_max, abs(cell_ip2)); + + // If this condition is true then the limiter is not sensitive to roundoff and we use the limited ratio + // McCorquodale & Colella 2011 eq. 27 + Real const rho = (abs(slope_2nd_face) > (1.0e-12) * cell_max) ? slope_2nd_limited / slope_2nd_face : 0.0; + + // Colella & Sekora eq. 25 + Real slope_face_left = cell_i - interface_R_imh; + Real slope_face_right = interface_L_iph - cell_i; + + // Check for local extrema + if ((slope_face_left * slope_face_right) <= 0.0 or ((cell_ip1 - cell_i) * (cell_i - cell_im1)) <= 0.0) { + // Extrema detected + // Check if relative change in limited 2nd deriv is > roundoff + if (rho <= (1.0 - (1.0e-12))) { + // Limit smooth extrema + // Colella & Sekora eq. 23 + interface_R_imh = cell_i - rho * slope_face_left; + interface_L_iph = cell_i + rho * slope_face_right; + } + } else { + // No extrema detected + // Overshoot i-1/2,R / i,(-) state + if (abs(slope_face_left) >= 2.0 * abs(slope_face_right)) { + interface_R_imh = cell_i - 2.0 * slope_face_right; + } + // Overshoot i+1/2,L / i,(+) state + if (abs(slope_face_right) >= 2.0 * abs(slope_face_left)) { + interface_L_iph = cell_i + 2.0 * slope_face_left; + } + } +} +// ===================================================================================================================== + +// ===================================================================================================================== +/*! + * \brief Write the interface data to the appropriate arrays + * + * \param[in] interface_state The interface state to write + * \param[out] dev_interface The interface array + * \param[in] dev_conserved The conserved variables + * \param[in] id The cell id to write to + * \param[in] n_cells The total number of cells + * \param[in] o1 Directional parameter + * \param[in] o2 Directional parameter + * \param[in] o3 Directional parameter + * \param[in] gamma The adiabatic index + */ +void __device__ __host__ __inline__ Write_Data(Primitive const &interface_state, Real *dev_interface, + Real const *dev_conserved, size_t const &id, size_t const &n_cells, + size_t const &o1, size_t const &o2, size_t const &o3, Real const &gamma) +{ + // Write out density and momentum + dev_interface[grid_enum::density * n_cells + id] = interface_state.density; + dev_interface[o1 * n_cells + id] = interface_state.density * interface_state.velocity_x; + dev_interface[o2 * n_cells + id] = interface_state.density * interface_state.velocity_y; + dev_interface[o3 * n_cells + id] = interface_state.density * interface_state.velocity_z; + +#ifdef MHD + // Write the Y and Z interface states and load the X magnetic face needed to compute the energy + Real magnetic_x; + switch (o1) { + case grid_enum::momentum_x: + dev_interface[grid_enum::Q_x_magnetic_y * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_x_magnetic_z * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_x * n_cells + id]; + break; + case grid_enum::momentum_y: + dev_interface[grid_enum::Q_y_magnetic_z * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_y_magnetic_x * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_y * n_cells + id]; + break; + case grid_enum::momentum_z: + dev_interface[grid_enum::Q_z_magnetic_x * n_cells + id] = interface_state.magnetic_y; + dev_interface[grid_enum::Q_z_magnetic_y * n_cells + id] = interface_state.magnetic_z; + magnetic_x = dev_conserved[grid_enum::magnetic_z * n_cells + id]; + break; + } + + // Compute the MHD energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma, magnetic_x, interface_state.magnetic_y, interface_state.magnetic_z); +#else // not MHD + // Compute the hydro energy + dev_interface[grid_enum::Energy * n_cells + id] = hydro_utilities::Calc_Energy_Primitive( + interface_state.pressure, interface_state.density, interface_state.velocity_x, interface_state.velocity_y, + interface_state.velocity_z, gamma); +#endif // MHD + +#ifdef DE + dev_interface[grid_enum::GasEnergy * n_cells + id] = interface_state.density * interface_state.gas_energy; +#endif // DE +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_interface[(grid_enum::scalar + i) * n_cells + id] = interface_state.density * interface_state.scalar[i]; + } +#endif // SCALAR +} +// ===================================================================================================================== +} // namespace reconstruction diff --git a/src/reconstruction/reconstruction_tests.cu b/src/reconstruction/reconstruction_tests.cu new file mode 100644 index 000000000..dc1f10720 --- /dev/null +++ b/src/reconstruction/reconstruction_tests.cu @@ -0,0 +1,682 @@ +/*! + * \file reconstruction_tests.cu + * \brief Tests for the contents of reconstruction.h + * + */ + +// STL Includes +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../io/io.h" +#include "../reconstruction/reconstruction.h" +#include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/testing_utilities.h" + +#ifdef MHD +__global__ void Test_Prim_2_Char(reconstruction::Primitive const primitive, + reconstruction::Primitive const primitive_slope, + reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + Real const sound_speed_squared, reconstruction::Characteristic *characteristic_slope) +{ + *characteristic_slope = reconstruction::Primitive_To_Characteristic(primitive, primitive_slope, eigenvectors, + sound_speed, sound_speed_squared, gamma); +} + +__global__ void Test_Char_2_Prim(reconstruction::Primitive const primitive, + reconstruction::Characteristic const characteristic_slope, + reconstruction::EigenVecs const eigenvectors, Real const gamma, Real const sound_speed, + Real const sound_speed_squared, reconstruction::Primitive *primitive_slope) +{ + *primitive_slope = reconstruction::Characteristic_To_Primitive(primitive, characteristic_slope, eigenvectors, + sound_speed, sound_speed_squared, gamma); +} + +__global__ void Test_Compute_Eigenvectors(reconstruction::Primitive const primitive, Real const sound_speed, + Real const sound_speed_squared, Real const gamma, + reconstruction::EigenVecs *eigenvectors) +{ + *eigenvectors = reconstruction::Compute_Eigenvectors(primitive, sound_speed, sound_speed_squared, gamma); +} + +TEST(tMHDReconstructionPrimitive2Characteristic, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const primitive_slope{9, 10, 11, 12, 13, 14, 15, 16}; + reconstruction::EigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Prim_2_Char, 1, 1, 0, 0, primitive, primitive_slope, eigenvectors, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + reconstruction::Characteristic const host_results = dev_results.at(0); + + // Check results + reconstruction::Characteristic const fiducial_results{-40327, 110, -132678, 7.4400000000000004, 98864, 98, 103549}; + testing_utilities::Check_Results(fiducial_results.a0, host_results.a0, "a0"); + testing_utilities::Check_Results(fiducial_results.a1, host_results.a1, "a1"); + testing_utilities::Check_Results(fiducial_results.a2, host_results.a2, "a2"); + testing_utilities::Check_Results(fiducial_results.a3, host_results.a3, "a3"); + testing_utilities::Check_Results(fiducial_results.a4, host_results.a4, "a4"); + testing_utilities::Check_Results(fiducial_results.a5, host_results.a5, "a5"); + testing_utilities::Check_Results(fiducial_results.a6, host_results.a6, "a6"); +} + +TEST(tMHDReconstructionCharacteristic2Primitive, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Characteristic const characteristic_slope{17, 18, 19, 20, 21, 22, 23}; + reconstruction::EigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Char_2_Prim, 1, 1, 0, 0, primitive, characteristic_slope, eigenvectors, gamma, sound_speed, + sound_speed_squared, dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results + reconstruction::Primitive const fiducial_results{1740, 2934, -2526, -2828, 14333.333333333338, 0.0, -24040, 24880}; + testing_utilities::Check_Results(fiducial_results.density, host_results.density, "density"); + testing_utilities::Check_Results(fiducial_results.velocity_x, host_results.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_results.velocity_y, host_results.velocity_y, "velocity_y", 1.34E-14); + testing_utilities::Check_Results(fiducial_results.velocity_z, host_results.velocity_z, "velocity_z", 1.6E-14); + testing_utilities::Check_Results(fiducial_results.pressure, host_results.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_results.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_results.magnetic_z, host_results.magnetic_z, "magnetic_z"); +} + +TEST(tMHDReconstructionComputeEigenvectors, CorrectInputExpectCorrectOutput) +{ + // Test parameters + Real const &gamma = 5. / 3.; + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + Real const sound_speed = hydro_utilities::Calc_Sound_Speed(primitive.pressure, primitive.density, gamma); + Real const sound_speed_squared = sound_speed * sound_speed; + + // Run test + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Compute_Eigenvectors, 1, 1, 0, 0, primitive, sound_speed, sound_speed_squared, gamma, + dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + reconstruction::EigenVecs const host_results = dev_results.at(0); + // std::cout << to_string_exact(host_results.magnetosonic_speed_fast) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_slow) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_fast_squared) << ","; + // std::cout << to_string_exact(host_results.magnetosonic_speed_slow_squared) << ","; + // std::cout << to_string_exact(host_results.alpha_fast) << ","; + // std::cout << to_string_exact(host_results.alpha_slow) << ","; + // std::cout << to_string_exact(host_results.beta_y) << ","; + // std::cout << to_string_exact(host_results.beta_z) << ","; + // std::cout << to_string_exact(host_results.n_fs) << ","; + // std::cout << to_string_exact(host_results.sign) << ","; + // std::cout << to_string_exact(host_results.q_fast) << ","; + // std::cout << to_string_exact(host_results.q_slow) << ","; + // std::cout << to_string_exact(host_results.a_fast) << ","; + // std::cout << to_string_exact(host_results.a_slow) << ","; + // std::cout << to_string_exact(host_results.q_prime_fast) << ","; + // std::cout << to_string_exact(host_results.q_prime_slow) << ","; + // std::cout << to_string_exact(host_results.a_prime_fast) << ","; + // std::cout << to_string_exact(host_results.a_prime_slow) << "," << std::endl; + // Check results + reconstruction::EigenVecs const fiducial_results{ + 12.466068627219666, 1.3894122191714398, 155.40286701855041, 1.9304663147829049, 0.20425471836256681, + 0.97891777490585408, 0.65850460786851805, 0.75257669470687782, 0.059999999999999984, 1, + 2.546253336541183, 1.3601203180183106, 0.58963258314939582, 2.825892204282022, 0.15277520019247093, + 0.081607219081098623, 0.03537795498896374, 0.1695535322569213}; + testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_fast, host_results.magnetosonic_speed_fast, + "magnetosonic_speed_fast"); + testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_slow, host_results.magnetosonic_speed_slow, + "magnetosonic_speed_slow"); + testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_fast_squared, + host_results.magnetosonic_speed_fast_squared, "magnetosonic_speed_fast_squared"); + testing_utilities::Check_Results(fiducial_results.magnetosonic_speed_slow_squared, + host_results.magnetosonic_speed_slow_squared, "magnetosonic_speed_slow_squared"); + testing_utilities::Check_Results(fiducial_results.alpha_fast, host_results.alpha_fast, "alpha_fast"); + testing_utilities::Check_Results(fiducial_results.alpha_slow, host_results.alpha_slow, "alpha_slow"); + testing_utilities::Check_Results(fiducial_results.beta_y, host_results.beta_y, "beta_y"); + testing_utilities::Check_Results(fiducial_results.beta_z, host_results.beta_z, "beta_z"); + testing_utilities::Check_Results(fiducial_results.n_fs, host_results.n_fs, "n_fs"); + testing_utilities::Check_Results(fiducial_results.sign, host_results.sign, "sign"); + testing_utilities::Check_Results(fiducial_results.q_fast, host_results.q_fast, "q_fast"); + testing_utilities::Check_Results(fiducial_results.q_slow, host_results.q_slow, "q_slow"); + testing_utilities::Check_Results(fiducial_results.a_fast, host_results.a_fast, "a_fast"); + testing_utilities::Check_Results(fiducial_results.a_slow, host_results.a_slow, "a_slow"); + testing_utilities::Check_Results(fiducial_results.q_prime_fast, host_results.q_prime_fast, "q_prime_fast"); + testing_utilities::Check_Results(fiducial_results.q_prime_slow, host_results.q_prime_slow, "q_prime_slow"); + testing_utilities::Check_Results(fiducial_results.a_prime_fast, host_results.a_prime_fast, "a_prime_fast"); + testing_utilities::Check_Results(fiducial_results.a_prime_slow, host_results.a_prime_slow, "a_prime_slow"); +} +#endif // MHD + +TEST(tALLReconstructionThreadGuard, CorrectInputExpectCorrectOutput) +{ + // Test parameters + int const order = 3; + int const nx = 6; + int const ny = 6; + int const nz = 6; + + // fiducial data + std::vector fiducial_vals(nx * ny * nz, 1); + fiducial_vals.at(86) = 0; + + // loop through all values of the indices and check them + for (int xid = 0; xid < nx; xid++) { + for (int yid = 0; yid < ny; yid++) { + for (int zid = 0; zid < nz; zid++) { + // Get the test value + bool test_val = reconstruction::Thread_Guard(nx, ny, nz, xid, yid, zid); + + // Compare + int id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + ASSERT_EQ(test_val, fiducial_vals.at(id)) + << "Test value not equal to fiducial value at id = " << id << std::endl; + } + } + } +} + +TEST(tALLReconstructionLoadData, CorrectInputExpectCorrectOutput) +{ + // Set up test and mock up grid + size_t const nx = 3, ny = 3, nz = 3; + size_t const n_cells = nx * ny * nz; + size_t const xid = 1, yid = 1, zid = 1; + size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z; + Real const gamma = 5. / 3.; + + std::vector conserved(n_cells * grid_enum::num_fields); + std::iota(conserved.begin(), conserved.end(), 0.0); + + // Up the energy part of the grid to avoid negative pressure + for (size_t i = grid_enum::Energy * n_cells; i < (grid_enum::Energy + 1) * n_cells; i++) { + conserved.at(i) *= 5.0E2; + } + + // Get test data + auto const test_data = reconstruction::Load_Data(conserved.data(), xid, yid, zid, nx, ny, n_cells, o1, o2, o3, gamma); + +// Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{ + 13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, 9662.3910256410272, 147.5, 173.5, 197.5}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_data.magnetic_x, test_data.magnetic_x, "magnetic_x"); + testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive fiducial_data{13, 3.0769230769230771, 5.1538461538461542, 7.2307692307692308, + 39950.641025641031}; + #ifdef DE + fiducial_data.pressure = 39950.641025641031; + #endif // DE + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionComputeSlope, CorrectInputExpectCorrectOutput) +{ +// Setup input data +#ifdef MHD + reconstruction::Primitive left{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive right{1, 2, 3, 4, 5, 6, 7, 8}; +#else // MHD + reconstruction::Primitive left{6, 7, 8, 9, 10}; + reconstruction::Primitive right{1, 2, 3, 4, 5}; +#endif // MHD + Real const coef = 0.5; + + // Get test data + auto test_data = reconstruction::Compute_Slope(left, right, coef); + + // Check results +#ifdef MHD + Real const fiducial_data = -2.5; + testing_utilities::Check_Results(fiducial_data, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data, test_data.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_data, test_data.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_data, test_data.magnetic_z, "magnetic_z"); +#else // MHD + Real const fiducial_data = -2.5; + testing_utilities::Check_Results(fiducial_data, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionVanLeerSlope, CorrectInputExpectCorrectOutput) +{ +// Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + + // Get test data + auto test_data = reconstruction::Van_Leer_Slope(left, right); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, + 5.5384615384615383, 6.666666666666667, 0, + 8.8421052631578956, 9.9047619047619051}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{1.7142857142857142, 3.1111111111111112, 4.3636363636363633, + 5.5384615384615383, 6.666666666666667}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +__global__ void Test_Monotize_Characteristic_Return_Primitive( + reconstruction::Primitive const primitive, reconstruction::Primitive const del_L, + reconstruction::Primitive const del_R, reconstruction::Primitive const del_C, reconstruction::Primitive const del_G, + reconstruction::Characteristic const del_a_L, reconstruction::Characteristic const del_a_R, + reconstruction::Characteristic const del_a_C, reconstruction::Characteristic const del_a_G, + reconstruction::EigenVecs const eigenvectors, Real const sound_speed, Real const sound_speed_squared, + Real const gamma, reconstruction::Primitive *monotonized_slope) +{ + *monotonized_slope = reconstruction::Monotonize_Characteristic_Return_Primitive( + primitive, del_L, del_R, del_C, del_G, del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, + sound_speed_squared, gamma); +} + +TEST(tALLReconstructionMonotonizeCharacteristicReturnPrimitive, CorrectInputExpectCorrectOutput) +{ +#ifdef MHD + reconstruction::Primitive const primitive{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive const del_L{9, 10, 11, 12, 13, 14, 15, 16}; + reconstruction::Primitive const del_R{17, 18, 19, 20, 21, 22, 23, 24}; + reconstruction::Primitive const del_C{25, 26, 27, 28, 29, 30, 31, 32}; + reconstruction::Primitive const del_G{33, 34, 35, 36, 37, 38, 39, 40}; + reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45, 46, 47}; + reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52, 53, 54}; + reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59, 60, 61}; + reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67, 68, 69}; +#else // MHD + reconstruction::Primitive const primitive{1, 2, 3, 4, 5}; + reconstruction::Primitive const del_L{9, 10, 11, 12, 13}; + reconstruction::Primitive const del_R{17, 18, 19, 20, 21}; + reconstruction::Primitive const del_C{25, 26, 27, 28, 29}; + reconstruction::Primitive const del_G{33, 34, 35, 36, 37}; + reconstruction::Characteristic const del_a_L{41, 42, 43, 44, 45}; + reconstruction::Characteristic const del_a_R{48, 49, 50, 51, 52}; + reconstruction::Characteristic const del_a_C{55, 56, 57, 58, 59}; + reconstruction::Characteristic const del_a_G{62, 64, 65, 66, 67}; +#endif // MHD + Real const sound_speed = 17.0, sound_speed_squared = sound_speed * sound_speed; + Real const gamma = 5. / 3.; + reconstruction::EigenVecs const eigenvectors{ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + }; + + // Get test data + cuda_utilities::DeviceVector dev_results(1); + hipLaunchKernelGGL(Test_Monotize_Characteristic_Return_Primitive, 1, 1, 0, 0, primitive, del_L, del_R, del_C, del_G, + del_a_L, del_a_R, del_a_C, del_a_G, eigenvectors, sound_speed, sound_speed_squared, gamma, + dev_results.data()); + GPU_Error_Check(); + cudaDeviceSynchronize(); + reconstruction::Primitive const host_results = dev_results.at(0); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{5046, 2934, -2526, -2828, 1441532, 0.0, -69716, 72152}; + testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_data.magnetic_y, host_results.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_data.magnetic_z, host_results.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{170, 68, 57, 58, 32946}; + testing_utilities::Check_Results(fiducial_data.density, host_results.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, host_results.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, host_results.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, host_results.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, host_results.pressure, "pressure"); +#endif // MHD +} + +TEST(tHYDROReconstructionMonotizeParabolicInterface, CorrectInputExpectCorrectOutput) +{ + // Input Data + + reconstruction::Primitive const cell_i{1.4708046701, 9.5021020181, 3.7123503442, 4.6476103466, 3.7096802847}; + reconstruction::Primitive const cell_im1{3.9547588941, 3.1552319951, 3.0209247624, 9.5841013261, 2.2945188332}; + reconstruction::Primitive const cell_ip1{5.1973323534, 6.9132613767, 1.8397298636, 5.341960387, 9.093498542}; + reconstruction::Primitive interface_L_iph{6.7787324804, 9.5389820358, 9.8522754567, 7.8305142852, 2.450533435}; + reconstruction::Primitive interface_R_imh{4.8015193892, 5.9124263972, 8.7513040382, 8.3659359773, 1.339777121}; + + // Get test data + reconstruction::Monotonize_Parabolic_Interface(cell_i, cell_im1, cell_ip1, interface_L_iph, interface_R_imh); + + // Check results + reconstruction::Primitive const fiducial_interface_L{1.4708046700999999, 9.5021020181000004, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001}; + reconstruction::Primitive const fiducial_interface_R{1.4708046700999999, 9.428341982700001, 3.7123503441999999, + 4.6476103465999996, 3.7096802847000001}; + testing_utilities::Check_Results(fiducial_interface_L.density, interface_L_iph.density, "density"); + testing_utilities::Check_Results(fiducial_interface_L.velocity_x, interface_L_iph.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_interface_L.velocity_y, interface_L_iph.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_interface_L.velocity_z, interface_L_iph.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_interface_L.pressure, interface_L_iph.pressure, "pressure"); + + testing_utilities::Check_Results(fiducial_interface_R.density, interface_R_imh.density, "density"); + testing_utilities::Check_Results(fiducial_interface_R.velocity_x, interface_R_imh.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_interface_R.velocity_y, interface_R_imh.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_interface_R.velocity_z, interface_R_imh.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_interface_R.pressure, interface_R_imh.pressure, "pressure"); +} + +TEST(tALLReconstructionCalcInterfaceLinear, CorrectInputExpectCorrectOutput) +{ + // Setup input data +#ifdef MHD + reconstruction::Primitive left{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive right{6, 7, 8, 9, 10, 11, 12, 13}; +#else // MHD + reconstruction::Primitive left{1, 2, 3, 4, 5}; + reconstruction::Primitive right{6, 7, 8, 9, 10}; +#endif // MHD + Real const coef = 0.5; + + // Get test data + auto test_data = reconstruction::Calc_Interface_Linear(left, right, coef); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5, 0, 10, 11.25}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{2.5, 3.75, 5, 6.25, 7.5}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionCalcInterfaceParabolic, CorrectInputExpectCorrectOutput) +{ + // Setup input data +#ifdef MHD + reconstruction::Primitive cell_i{1, 2, 3, 4, 5, 6, 7, 8}; + reconstruction::Primitive cell_im1{6, 7, 8, 9, 10, 11, 12, 13}; + reconstruction::Primitive slopes_i{14, 15, 16, 17, 18, 19, 20, 21}; + reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26, 27, 28, 29}; +#else // MHD + reconstruction::Primitive cell_i{1, 2, 3, 4, 5}; + reconstruction::Primitive cell_im1{6, 7, 8, 9, 10}; + reconstruction::Primitive slopes_i{14, 15, 16, 17, 18}; + reconstruction::Primitive slopes_im1{22, 23, 24, 25, 26}; +#endif // MHD + + // Get test data + auto test_data = reconstruction::Calc_Interface_Parabolic(cell_i, cell_im1, slopes_i, slopes_im1); + + // Check results +#ifdef MHD + reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, + 7.833333333333333, 8.8333333333333339, 0.0, + 10.833333333333334, 11.833333333333334}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); + testing_utilities::Check_Results(fiducial_data.magnetic_y, test_data.magnetic_y, "magnetic_y"); + testing_utilities::Check_Results(fiducial_data.magnetic_z, test_data.magnetic_z, "magnetic_z"); +#else // MHD + reconstruction::Primitive const fiducial_data{4.833333333333333, 5.833333333333333, 6.833333333333333, + 7.833333333333333, 8.8333333333333339}; + testing_utilities::Check_Results(fiducial_data.density, test_data.density, "density"); + testing_utilities::Check_Results(fiducial_data.velocity_x, test_data.velocity_x, "velocity_x"); + testing_utilities::Check_Results(fiducial_data.velocity_y, test_data.velocity_y, "velocity_y"); + testing_utilities::Check_Results(fiducial_data.velocity_z, test_data.velocity_z, "velocity_z"); + testing_utilities::Check_Results(fiducial_data.pressure, test_data.pressure, "pressure"); +#endif // MHD +} + +TEST(tALLReconstructionPPMSingleVariable, CorrectInputExpectCorrectOutput) +{ + // Set up PRNG to use + std::mt19937_64 prng(42); + std::uniform_real_distribution doubleRand(-100, 100); + + // Set up testing parameters + size_t const n_tests = 100; + std::vector input_data(n_tests * 5); + for (double &val : input_data) { + val = doubleRand(prng); + } + + std::vector fiducial_left_interface{ + 50.429040149605328, -40.625142952817804, 37.054257344499717, -55.796322960572695, -14.949021655598202, + -10.760611497035882, 71.107183338735751, -29.453314279116661, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 9.3792919223901166, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 96.420244795844823, 37.498528618937456, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -12.674113472365306, 30.421304081280084, + 43.700175645941769, 58.342347077360131, -31.574197692184548, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -94.449608348937488, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -60.629868182203786, -41.343090531127039, -75.449850543801574, -82.52313028208863, + 19.871484181185011, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -50.671539065300863, 13.424189957034622, 80.237684918029572, 32.454734198410179, 66.84741286999801, + 24.53669768915492, -67.195147776790975, 72.277527112459907, -46.094192444366435, -99.915875366345205, + 32.244024128018054, -95.648868731550635, 17.922876720365402, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 23.62791013796798, 59.620571575902432, 41.0983082454959, + -30.533954819557593, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + -49.167117951549066, 78.440749922366507, 51.390453104722326, 3.1993391287610393, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147, + }; + std::vector fiducial_right_interface{ + 50.429040149605328, 4.4043935241855703, 37.054257344499717, 23.707343328192596, -14.949021655598202, + -10.760611497035882, 8.367260859616664, 8.5357943668839624, 7.38606168778702, -23.210826670297152, + -85.15197822983292, 18.98804944849401, 64.754272117396766, 4.5584678980835918, 45.81912726561103, + 58.769584663215738, 47.626531326553447, 23.370742401854159, 47.06767164062336, -53.975231802858218, + -81.51278133300454, -74.554960772880221, 75.572387546643355, 61.339053128914685, -41.370881014041672, + -41.817524439980467, 58.391560533135817, -85.991024651293131, -36.626332669233776, 30.421304081280084, + 20.637382412674096, 58.342347077360131, -79.757902483702381, 98.151410701129635, -9.4994975790183389, + -87.49117921577357, -39.384192078363533, 79.849643090061676, 93.096197902468759, -64.374502025066192, + 82.037247010307937, -20.951323678824952, 46.927431599533087, -75.449850543801574, -54.603894223278004, + -59.419110050353098, -22.253989777496159, 86.943333900988137, -83.887344220269938, 73.270857190511975, + 84.784625452008811, -27.929776508530765, -9.6992610428405612, -65.233676045197072, -88.498474065470134, + 47.637114710282589, -69.50911815749248, -69.848254012650372, -7.4520009269431711, 90.887158278825865, + -79.086012597191512, -45.713537271527976, 80.237684918029572, -60.666381661910016, 68.727158732184449, + 24.53669768915492, -67.195147776790975, 72.610434112023597, 54.910597945673814, -19.862686571231023, + 32.244024128018054, -95.648868731550635, -34.761757909478987, -86.334093878928797, -16.580223524066724, + 39.48244113577249, 64.203567686297504, 0.77846541072490538, 59.620571575902432, 41.0983082454959, + -2.6491435658297036, -23.149979553301478, -54.098849622102691, -45.577469823900444, 33.284499908516068, + -39.186662569988762, 76.266375356625161, -51.650172854435624, -68.894636301310584, 98.410134045837452, + 30.9954824410611, 78.440749922366507, 51.390453104722326, 70.625792807373429, 43.749856317813453, + -81.399433434996496, 88.385686355761862, 78.242223440453444, 27.539590130937498, -6.9781781598207147, + }; + + // Run n_tests iterations of the loop choosing random numbers to put into the interface state computation and checking + // the results + for (size_t i = 0; i < n_tests; i++) { + // Run the function + double test_left_interface, test_right_interface; + size_t const idx = 5 * i; + reconstruction::PPM_Single_Variable(input_data[idx], input_data[idx + 1], input_data[idx + 2], input_data[idx + 3], + input_data[idx + 4], test_left_interface, test_right_interface); + + // Compare results + testing_utilities::Check_Results(fiducial_left_interface.at(i), test_left_interface, "left i+1/2 interface"); + testing_utilities::Check_Results(fiducial_right_interface.at(i), test_right_interface, "right i-1/2 interface"); + } +} + +TEST(tALLReconstructionWriteData, CorrectInputExpectCorrectOutput) +{ + // Set up test and mock up grid +#ifdef MHD + reconstruction::Primitive interface{1, 2, 3, 4, 5, 6, 7, 8}; +#else // MHD + reconstruction::Primitive interface{6, 7, 8, 9, 10}; +#endif // MHD + size_t const nx = 3, ny = 3, nz = 3; + size_t const n_cells = nx * ny * nz; + size_t const xid = 1, yid = 1, zid = 1; + size_t const id = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); + size_t const o1 = grid_enum::momentum_x, o2 = grid_enum::momentum_y, o3 = grid_enum::momentum_z; + Real const gamma = 5. / 3.; + + std::vector conserved(n_cells * grid_enum::num_fields); + std::vector interface_arr(n_cells * grid_enum::num_fields); + + // Get test data + reconstruction::Write_Data(interface, interface_arr.data(), conserved.data(), id, n_cells, o1, o2, o3, gamma); + +// Fiducial Data +#ifdef MHD + std::unordered_map fiducial_interface = {{13, 1}, {40, 2}, {67, 3}, {94, 4}, + {121, 78.5}, {148, 7}, {175, 8}}; +#else // MHD + std::unordered_map fiducial_interface = {{13, 6}, {40, 42}, {67, 48}, {94, 54}, {121, 597}}; +#endif // MHD + + // Perform Comparison + for (size_t i = 0; i < interface_arr.size(); i++) { + // Check the interface + double test_val = interface_arr.at(i); + double fiducial_val = (fiducial_interface.find(i) == fiducial_interface.end()) ? 0.0 : fiducial_interface[i]; + + testing_utilities::Check_Results(fiducial_val, test_val, "Interface at i=" + std::to_string(i)); + } +} + +TEST(tHYDROReconstructionPlmLimitInterfaces, CorrectInputExpectCorrectOutput) +{ + // Set up values to test + reconstruction::Primitive interface_l_iph, interface_r_imh; + reconstruction::Primitive cell_im1, cell_i, cell_ip1; + interface_r_imh.density = -1.94432878387898625e+14; + interface_r_imh.velocity_x = 1.42049955114756404e-04; + interface_r_imh.velocity_y = -2.61311412306644180e-06; + interface_r_imh.velocity_z = -1.99429361865204601e-07; + interface_r_imh.pressure = -2.01130121665840250e-14; + interface_l_iph.density = 1.94433200621991188e+14; + interface_l_iph.velocity_x = 1.42025407335853601e-04; + interface_l_iph.velocity_y = -2.61311412306644180e-06; + interface_l_iph.velocity_z = -6.01154878659959398e-06; + interface_l_iph.pressure = 2.01130321665840277e-14; + + cell_im1.density = 1.61101072114153951e+08; + cell_i.density = 1.61117046279133737e+08; + cell_ip1.density = 1.61011252191243321e+08; + cell_im1.velocity_x = 1.42067642369120116e-04; + cell_i.velocity_x = 1.42037681225305003e-04; + cell_ip1.velocity_x = 1.41901817571928041e-04; + cell_im1.velocity_y = -2.61228250783092252e-06; + cell_i.velocity_y = -2.61311412306644180e-06; + cell_ip1.velocity_y = -2.61155204131260820e-06; + cell_im1.velocity_z = 2.71420653365757378e-06; + cell_i.velocity_z = -3.10548907423239929e-06; + cell_ip1.velocity_z = -8.91005201578514336e-06; + cell_im1.pressure = 9.99999999999999945e-21; + cell_i.pressure = 9.99999999999999945e-21; + cell_ip1.pressure = 4.70262856027679407e-03; + + // Set fiducial values + reconstruction::Primitive interface_r_imh_fiducial, interface_l_iph_fiducial; + interface_r_imh_fiducial.density = 161101072.11415395; + interface_r_imh_fiducial.velocity_x = 1.42049955114756404e-04; + interface_r_imh_fiducial.velocity_y = -2.61311412306644180e-06; + interface_r_imh_fiducial.velocity_z = -1.99429361865204601e-07; + interface_r_imh_fiducial.pressure = 9.99999999999999945e-21; + interface_l_iph_fiducial.density = 1.61117046279133737e+08; + interface_l_iph_fiducial.velocity_x = 1.42025407335853601e-04; + interface_l_iph_fiducial.velocity_y = -2.61311412306644180e-06; + interface_l_iph_fiducial.velocity_z = -6.01154878659959398e-06; + interface_l_iph_fiducial.pressure = 2.0113032166584028e-14; + + // Run function + reconstruction::Plm_Limit_Interfaces(interface_l_iph, interface_r_imh, cell_im1, cell_i, cell_ip1); + + // Check values + testing_utilities::Check_Results(interface_l_iph_fiducial.density, interface_l_iph.density, + "Mismatch in l_iph density"); + testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_x, interface_l_iph.velocity_x, + "Mismatch in l_iph velocity_x"); + testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_y, interface_l_iph.velocity_y, + "Mismatch in l_iph velocity_y"); + testing_utilities::Check_Results(interface_l_iph_fiducial.velocity_z, interface_l_iph.velocity_z, + "Mismatch in l_iph velocity_z"); + testing_utilities::Check_Results(interface_l_iph_fiducial.pressure, interface_l_iph.pressure, + "Mismatch in l_iph pressure"); + testing_utilities::Check_Results(interface_r_imh_fiducial.density, interface_r_imh.density, + "Mismatch in r_imh density"); + testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_x, interface_r_imh.velocity_x, + "Mismatch in r_imh velocity_x"); + testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_y, interface_r_imh.velocity_y, + "Mismatch in r_imh velocity_y"); + testing_utilities::Check_Results(interface_r_imh_fiducial.velocity_z, interface_r_imh.velocity_z, + "Mismatch in r_imh velocity_z"); + testing_utilities::Check_Results(interface_r_imh_fiducial.pressure, interface_r_imh.pressure, + "Mismatch in r_imh pressure"); +} diff --git a/src/riemann_solvers/exact_cuda.cu b/src/riemann_solvers/exact_cuda.cu index d84464828..9e0a4cff2 100644 --- a/src/riemann_solvers/exact_cuda.cu +++ b/src/riemann_solvers/exact_cuda.cu @@ -1,114 +1,119 @@ /*! \file exact_cuda.cu * \brief Function definitions for the cuda exact Riemann solver.*/ -#ifdef CUDA - -#include "../utils/gpu.hpp" #include #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../riemann_solvers/exact_cuda.h" +#include "../utils/gpu.hpp" -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" #endif - - -/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief Exact Riemann solver based on the Fortran code given in Sec. 4.9 of Toro (1999). */ -__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief Exact Riemann solver based on the Fortran code given in + * Sec. 4.9 of Toro (1999). */ +__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; int o1, o2, o3; if (dir == 0) { - o1 = 1; o2 = 2; o3 = 3; + o1 = 1; + o2 = 2; + o3 = 3; } if (dir == 1) { - o1 = 2; o2 = 3; o3 = 1; + o1 = 2; + o2 = 3; + o3 = 1; } if (dir == 2) { - o1 = 3; o2 = 1; o3 = 2; + o1 = 3; + o2 = 1; + o3 = 2; } - Real dl, vxl, vyl, vzl, pl, cl; //density, velocity, pressure, sound speed (left) - Real dr, vxr, vyr, vzr, pr, cr; //density, velocity, pressure, sound speed (right) - Real ds, vs, ps, Es; //sample_CUDAd density, velocity, pressure, total energy - Real vm, pm; //velocity and pressure in the star region + Real dl, vxl, vyl, vzl, pl, + cl; // density, velocity, pressure, sound speed (left) + Real dr, vxr, vyr, vzr, pr, + cr; // density, velocity, pressure, sound speed (right) + Real ds, vs, ps, Es; // sample_CUDAd density, velocity, pressure, total + // energy + Real vm, pm; // velocity and pressure in the star region - #ifdef DE - Real gel, ger, E_kin, E, dge ; - #endif +#ifdef DE + Real gel, ger, E_kin, E, dge; +#endif - #ifdef SCALAR +#ifdef SCALAR Real scalarl[NSCALARS], scalarr[NSCALARS]; - #endif - +#endif // Each thread executes the solver independently - //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) - if (xid < nx && yid < ny && zid < nz) - { + // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) + if (xid < nx && yid < ny && zid < nz) { // retrieve primitive variables - dl = dev_bounds_L[ tid]; - vxl = dev_bounds_L[o1*n_cells + tid]/dl; - vyl = dev_bounds_L[o2*n_cells + tid]/dl; - vzl = dev_bounds_L[o3*n_cells + tid]/dl; - #ifdef DE //PRESSURE_DE - E = dev_bounds_L[4*n_cells + tid]; - E_kin = 0.5 * dl * ( vxl*vxl + vyl*vyl + vzl*vzl ); - dge = dev_bounds_L[(n_fields-1)*n_cells + tid]; - pl = hydro_utilities::Get_Pressure_From_DE( E, E - E_kin, dge, gamma ); - #else - pl = (dev_bounds_L[4*n_cells + tid] - 0.5*dl*(vxl*vxl + vyl*vyl + vzl*vzl)) * (gamma - 1.0); - #endif //PRESSURE_DE - pl = fmax(pl, (Real) TINY_NUMBER); - #ifdef SCALAR - for (int i=0; i= 0) - { - dev_flux[o2*n_cells + tid] = ds*vs*vyl; - dev_flux[o3*n_cells + tid] = ds*vs*vzl; - #ifdef SCALAR - for (int i=0; i= 0) { + dev_flux[o2 * n_cells + tid] = ds * vs * vyl; + dev_flux[o3 * n_cells + tid] = ds * vs * vzl; +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[(5 + i) * n_cells + tid] = ds * vs * scalarl[i]; } - #endif - #ifdef DE - dev_flux[(n_fields-1)*n_cells + tid] = ds*vs*gel; - #endif - Es = (ps/(gamma - 1.0)) + 0.5*ds*(vs*vs + vyl*vyl + vzl*vzl); - } - else - { - dev_flux[o2*n_cells + tid] = ds*vs*vyr; - dev_flux[o3*n_cells + tid] = ds*vs*vzr; - #ifdef SCALAR - for (int i=0; i nriter) { - //printf("Divergence in Newton-Raphson iteration. p = %e\n", *p); + // printf("Divergence in Newton-Raphson iteration. p = %e\n", *p); } // compute velocity in star region - *v = 0.5*(vxl + vxr + fr - fl); - + *v = 0.5 * (vxl + vxr + fr - fl); } - -__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, - Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma) +__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl, + Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma) { // purpose: to sample the solution throughout the wave // pattern. Pressure pm and velocity vm in the @@ -251,96 +254,81 @@ __device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real Real c, sl, sr; - if (vm >= 0) // sampling point lies to the left of the contact discontinuity + if (vm >= 0) // sampling point lies to the left of the contact discontinuity { - if (pm <= pl) // left rarefaction + if (pm <= pl) // left rarefaction { - if (vxl - cl >= 0) // sampled point is in left data state + if (vxl - cl >= 0) // sampled point is in left data state { *d = dl; *v = vxl; *p = pl; - } - else - { - if (vm - cl*powf(pm/pl, (gamma - 1.0)/(2.0 * gamma)) < 0) // sampled point is in star left state + } else { + if (vm - cl * powf(pm / pl, (gamma - 1.0) / (2.0 * gamma)) < 0) // sampled point is in star left state { - *d = dl*powf(pm/pl, 1.0/gamma); + *d = dl * powf(pm / pl, 1.0 / gamma); *v = vm; *p = pm; - } - else // sampled point is inside left fan + } else // sampled point is inside left fan { - c = (2.0 / (gamma + 1.0))*(cl + ((gamma - 1.0) / 2.0)*vxl); + c = (2.0 / (gamma + 1.0)) * (cl + ((gamma - 1.0) / 2.0) * vxl); *v = c; - *d = dl*powf(c/cl, 2.0 / (gamma - 1.0)); - *p = pl*powf(c/cl, 2.0 * gamma / (gamma - 1.0)); + *d = dl * powf(c / cl, 2.0 / (gamma - 1.0)); + *p = pl * powf(c / cl, 2.0 * gamma / (gamma - 1.0)); } } - } - else // left shock + } else // left shock { - sl = vxl - cl*sqrt(((gamma + 1.0)/(2.0 * gamma))*(pm/pl) + ((gamma - 1.0)/(2.0 * gamma))); - if (sl >= 0) // sampled point is in left data state + sl = vxl - cl * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pl) + ((gamma - 1.0) / (2.0 * gamma))); + if (sl >= 0) // sampled point is in left data state { *d = dl; *v = vxl; *p = pl; - } - else // sampled point is in star left state + } else // sampled point is in star left state { - *d = dl*(pm/pl + ((gamma - 1.0) / (gamma + 1.0)))/((pm/pl)*((gamma - 1.0) / (gamma + 1.0)) + 1.0); + *d = dl * (pm / pl + ((gamma - 1.0) / (gamma + 1.0))) / ((pm / pl) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); *v = vm; *p = pm; } } - } - else // sampling point lies to the right of the contact discontinuity + } else // sampling point lies to the right of the contact discontinuity { - if (pm > pr) // right shock + if (pm > pr) // right shock { - sr = vxr + cr*sqrt(((gamma + 1.0)/(2.0 * gamma))*(pm/pr) + ((gamma - 1.0)/(2.0 * gamma))); - if (sr <= 0) // sampled point is in right data state + sr = vxr + cr * sqrt(((gamma + 1.0) / (2.0 * gamma)) * (pm / pr) + ((gamma - 1.0) / (2.0 * gamma))); + if (sr <= 0) // sampled point is in right data state { *d = dr; *v = vxr; *p = pr; - } - else // sampled point is in star right state + } else // sampled point is in star right state { - *d = dr*(pm/pr + ((gamma - 1.0) / (gamma + 1.0)))/((pm/pr)*((gamma - 1.0) / (gamma + 1.0)) + 1.0); + *d = dr * (pm / pr + ((gamma - 1.0) / (gamma + 1.0))) / ((pm / pr) * ((gamma - 1.0) / (gamma + 1.0)) + 1.0); *v = vm; *p = pm; } - } - else // right rarefaction + } else // right rarefaction { - if (vxr + cr <= 0) // sampled point is in right data state + if (vxr + cr <= 0) // sampled point is in right data state { *d = dr; *v = vxr; *p = pr; - } - else - { - if (vm + cr*powf(pm/pr, (gamma - 1.0)/(2.0 * gamma)) >= 0) // sampled point is in star right state + } else { + if (vm + cr * powf(pm / pr, (gamma - 1.0) / (2.0 * gamma)) >= 0) // sampled point is in star right state { - *d = dr*powf(pm/pr, (1.0/gamma)); + *d = dr * powf(pm / pr, (1.0 / gamma)); *v = vm; *p = pm; - } - else // sampled point is inside right fan + } else // sampled point is inside right fan { - c = (2.0 / (gamma + 1.0))*(cr - ((gamma - 1.0) / 2.0)*vxr); + c = (2.0 / (gamma + 1.0)) * (cr - ((gamma - 1.0) / 2.0) * vxr); *v = -c; - *d = dr*powf(c/cr, 2.0 / (gamma - 1.0)); - *p = pr*powf(c/cr, 2.0 * gamma / (gamma - 1.0)); + *d = dr * powf(c / cr, 2.0 / (gamma - 1.0)); + *p = pr * powf(c / cr, 2.0 * gamma / (gamma - 1.0)); } } } } } - - - -#endif //CUDA diff --git a/src/riemann_solvers/exact_cuda.h b/src/riemann_solvers/exact_cuda.h index 4d6d1f3d6..4cb004fb5 100644 --- a/src/riemann_solvers/exact_cuda.h +++ b/src/riemann_solvers/exact_cuda.h @@ -1,27 +1,27 @@ /*! \file exact_cuda.h - * \brief Declarations of functions for the cuda exact riemann solver kernel. */ - -#ifdef CUDA + * \brief Declarations of functions for the cuda exact riemann solver kernel. + */ #ifndef EXACT_CUDA_H #define EXACT_CUDA_H #include "../global/global.h" - -/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief Exact Riemann solver based on the Fortran code given in Sec. 4.9 of Toro (1999). */ -__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); +/*! \fn Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief Exact Riemann solver based on the Fortran code given in + * Sec. 4.9 of Toro (1999). */ +__global__ void Calculate_Exact_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields); __device__ Real guessp_CUDA(Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); __device__ void prefun_CUDA(Real *f, Real *fd, Real p, Real dk, Real pk, Real ck, Real gamma); -__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); - -__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, - Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); +__device__ void starpv_CUDA(Real *p, Real *v, Real dl, Real vxl, Real pl, Real cl, Real dr, Real vxr, Real pr, Real cr, + Real gamma); +__device__ void sample_CUDA(const Real pm, const Real vm, Real *d, Real *v, Real *p, Real dl, Real vxl, Real pl, + Real cl, Real dr, Real vxr, Real pr, Real cr, Real gamma); -#endif //EXACT_CUDA_H -#endif //CUDA +#endif // EXACT_CUDA_H diff --git a/src/riemann_solvers/hll_cuda.cu b/src/riemann_solvers/hll_cuda.cu index a69cf9d0f..2987771b2 100644 --- a/src/riemann_solvers/hll_cuda.cu +++ b/src/riemann_solvers/hll_cuda.cu @@ -1,31 +1,32 @@ /*! \file hllc_cuda.cu * \brief Function definitions for the cuda HLLC Riemann solver.*/ -#ifdef CUDA - -#include "../utils/gpu.hpp" #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../riemann_solvers/hll_cuda.h" +#include "../utils/gpu.hpp" -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" #endif - -/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */ -__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief HLLC Riemann solver based on the version described in Toro + * (2006), Sec. 10.4. */ +__global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El; Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er; @@ -40,70 +41,75 @@ __global__ void Calculate_HLL_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R // Real dls, drs, mxls, mxrs, myls, myrs, mzls, mzrs, Els, Ers; Real f_d, f_mx, f_my, f_mz, f_E; Real Sl, Sr, cfl, cfr; - #ifdef DE +#ifdef DE Real dgel, dger, f_ge_l, f_ge_r, f_ge, E_kin; - #endif - #ifdef SCALAR +#endif +#ifdef SCALAR Real dscl[NSCALARS], dscr[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; - #endif +#endif // Real etah = 0; int o1, o2, o3; - if (dir==0) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir==1) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir==2) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // Each thread executes the solver independently - //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) - if (xid < nx && yid < ny && zid < nz) - { + // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) + if (xid < nx && yid < ny && zid < nz) { // retrieve conserved variables - dl = dev_bounds_L[ tid]; - mxl = dev_bounds_L[o1*n_cells + tid]; - myl = dev_bounds_L[o2*n_cells + tid]; - mzl = dev_bounds_L[o3*n_cells + tid]; - El = dev_bounds_L[4*n_cells + tid]; - #ifdef SCALAR - for (int i=0; i 0.0) { - dev_flux[ tid] = f_d_l; - dev_flux[o1*n_cells+tid] = f_mx_l; - dev_flux[o2*n_cells+tid] = f_my_l; - dev_flux[o3*n_cells+tid] = f_mz_l; - dev_flux[4*n_cells+tid] = f_E_l; - #ifdef SCALAR - for (int i=0; i + #include "../global/global.h" #include "../global/global_cuda.h" #include "../riemann_solvers/hllc_cuda.h" +#include "../utils/gpu.hpp" -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" #endif - -/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) - * \brief HLLC Riemann solver based on the version described in Toro (2006), Sec. 10.4. */ -__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int + * n_fields) \brief HLLC Riemann solver based on the version described in Toro + * (2006), Sec. 10.4. */ +__global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El; Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er; @@ -40,96 +41,102 @@ __global__ void Calculate_HLLC_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_ Real dls, drs, mxls, mxrs, myls, myrs, mzls, mzrs, Els, Ers; Real f_d, f_mx, f_my, f_mz, f_E; Real Sl, Sr, Sm, cfl, cfr, ps; - #ifdef DE +#ifdef DE Real dgel, dger, gel, ger, gels, gers, f_ge_l, f_ge_r, f_ge, E_kin; - #endif - #ifdef SCALAR - Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], f_sc_r[NSCALARS], f_sc[NSCALARS]; - #endif +#endif +#ifdef SCALAR + Real dscl[NSCALARS], dscr[NSCALARS], scl[NSCALARS], scr[NSCALARS], scls[NSCALARS], scrs[NSCALARS], f_sc_l[NSCALARS], + f_sc_r[NSCALARS], f_sc[NSCALARS]; +#endif Real etah = 0; int o1, o2, o3; - if (dir==0) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir==1) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir==2) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // Each thread executes the solver independently - //if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) - if (xid < nx && yid < ny && zid < nz) - { + // if (xid > n_ghost-3 && xid < nx-n_ghost+1 && yid < ny && zid < nz) + if (xid < nx && yid < ny && zid < nz) { // retrieve conserved variables - dl = dev_bounds_L[ tid]; - mxl = dev_bounds_L[o1*n_cells + tid]; - myl = dev_bounds_L[o2*n_cells + tid]; - mzl = dev_bounds_L[o3*n_cells + tid]; - El = dev_bounds_L[4*n_cells + tid]; - #ifdef SCALAR - for (int i=0; i 0.0) { - dev_flux[ tid] = f_d_l; - dev_flux[o1*n_cells+tid] = f_mx_l; - dev_flux[o2*n_cells+tid] = f_my_l; - dev_flux[o3*n_cells+tid] = f_mz_l; - dev_flux[4*n_cells+tid] = f_E_l; - #ifdef SCALAR - for (int i=0; i -#include #include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global_cuda.h" +#include "../riemann_solvers/hllc_cuda.h" // Include code to test #include "../utils/gpu.hpp" #include "../utils/testing_utilities.h" -#include "../riemann_solvers/hllc_cuda.h" // Include code to test #if defined(CUDA) && defined(HLLC) - // ========================================================================= - /*! - * \brief Test fixture for simple testing of the HLLC Riemann Solver. - Effectively takes the left state, right state, fiducial fluxes, and - custom user output then performs all the required running and testing - * - */ - class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test - { - protected: - // ===================================================================== - /*! - * \brief Compute and return the HLLC fluxes - * - * \param[in] leftState The state on the left side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, and energy. - * \param[in] rightState The state on the right side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, and energy. - * \param[in] gamma The adiabatic index - * \return std::vector - */ - std::vector computeFluxes(std::vector const &stateLeft, - std::vector const &stateRight, - Real const &gamma) - { - // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction? - int const ny = 1; // Number of cells in the y-direction? - int const nz = 1; // Number of cells in the z-direction? - int const nGhost = 0; // Isn't actually used it appears - int const direction = 0; // Which direction, 0=x, 1=y, 2=z - int const nFields = 5; // Total number of conserved fields - - // Launch Parameters - dim3 const dimGrid (1,1,1); // How many blocks in the grid - dim3 const dimBlock(1,1,1); // How many threads per block - - // Create the std::vector to store the fluxes and declare the device - // pointers - std::vector testFlux(5); - Real *devConservedLeft; - Real *devConservedRight; - Real *devTestFlux; - - // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, nFields*sizeof(Real))); - - CudaSafeCall(cudaMemcpy(devConservedLeft, - stateLeft.data(), - nFields*sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, - stateRight.data(), - nFields*sizeof(Real), - cudaMemcpyHostToDevice)); - - // Run kernel - hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, - dimGrid, - dimBlock, - 0, - 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devTestFlux, - nx, - ny, - nz, - nGhost, - gamma, - direction, - nFields); - - CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), - devTestFlux, - nFields*sizeof(Real), - cudaMemcpyDeviceToHost)); - - // Make sure to sync with the device so we have the results - cudaDeviceSynchronize(); - CudaCheckError(); - - return testFlux; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Check if the fluxes are correct - * - * \param[in] fiducialFlux The fiducial flux in conserved variables. In - * order the elements are: density, x-momentum, y-momentum, z-momentum, - * and energy. - * \param[in] testFlux The test flux in conserved variables. In order - * the elements are: density, x-momentum, y-momentum, z-momentum, and - * energy. - * \param[in] customOutput Any custom output the user would like to - * print. It will print after the default GTest output but before the - * values that failed are printed - */ - void checkResults(std::vector const &fiducialFlux, - std::vector const &testFlux, - std::string const &customOutput = "") - { - // Field names - std::vector const fieldNames {"Densities", - "X Momentum", - "Y Momentum", - "Z Momentum", - "Energies"}; - - ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) - and (fiducialFlux.size() == fieldNames.size())) - << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl - << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl - << "testFlux.size() = " << testFlux.size() << std::endl - << "fieldNames.size() = " << fieldNames.size() << std::endl; - - // Check for equality - for (size_t i = 0; i < fieldNames.size(); i++) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], - testFlux[i], - absoluteDiff, - ulpsDiff); - EXPECT_TRUE(areEqual) - << std::endl << customOutput << std::endl - << "There's a difference in " << fieldNames[i] << " Flux" << std::endl - << "The fiducial value is: " << fiducialFlux[i] << std::endl - << "The test value is: " << testFlux[i] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - } - // ===================================================================== - - }; - // ========================================================================= - - // ========================================================================= - // Testing Calculate_HLLC_Fluxes_CUDA - /*! - * \brief Test the HLLC solver with the input from the high pressure side of a - sod shock tube. Correct results are hard coded into this test. Similar tests - do not need to be this verbose, simply passing values to the kernel call - should be sufficient in most cases - * - */ - TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name - HighPressureSideExpectCorrectOutput) // Test name - { - // Physical Values - Real const density = 1.0; - Real const pressure = 1.0; - Real const velocityX = 0.0; - Real const velocityY = 0.0; - Real const velocityZ = 0.0; - Real const momentumX = density * velocityX; - Real const momentumY = density * velocityY; - Real const momentumZ = density * velocityZ; - Real const gamma = 1.4; - Real const energy = (pressure/(gamma - 1)) + 0.5 * density - * (velocityX*velocityX - + velocityY*velocityY - + velocityZ*velocityZ); - - std::vector const state{density, - momentumX, - momentumY, - momentumZ, - energy}; - std::vector const fiducialFluxes{0, 1, 0, 0, 0}; - - // Compute the fluxes - std::vector const testFluxes = computeFluxes(state, // Left state - state, // Right state - gamma); // Adiabatic Index - - // Check for correctness - checkResults(fiducialFluxes, testFluxes); +// ========================================================================= +/*! + * \brief Test fixture for simple testing of the HLLC Riemann Solver. + Effectively takes the left state, right state, fiducial fluxes, and + custom user output then performs all the required running and testing + * + */ +// NOLINTNEXTLINE(readability-identifier-naming) +class tHYDROCalculateHLLCFluxesCUDA : public ::testing::Test +{ + protected: + // ===================================================================== + /*! + * \brief Compute and return the HLLC fluxes + * + * \param[in] leftState The state on the left side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, and energy. + * \param[in] rightState The state on the right side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, and energy. + * \param[in] gamma The adiabatic index + * \return std::vector + */ + std::vector Compute_Fluxes(std::vector const &stateLeft, std::vector const &stateRight, + Real const &gamma) + { + // Simulation Paramters + int const nx = 1; // Number of cells in the x-direction? + int const ny = 1; // Number of cells in the y-direction? + int const nz = 1; // Number of cells in the z-direction? + int const nGhost = 0; // Isn't actually used it appears + int const direction = 0; // Which direction, 0=x, 1=y, 2=z + int const nFields = 5; // Total number of conserved fields + + // Launch Parameters + dim3 const dimGrid(1, 1, 1); // How many blocks in the grid + dim3 const dimBlock(1, 1, 1); // How many threads per block + + // Create the std::vector to store the fluxes and declare the device + // pointers + std::vector testFlux(5); + Real *devConservedLeft; + Real *devConservedRight; + Real *devTestFlux; + + // Allocate device arrays and copy data + GPU_Error_Check(cudaMalloc(&devConservedLeft, nFields * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedRight, nFields * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devTestFlux, nFields * sizeof(Real))); + + GPU_Error_Check(cudaMemcpy(devConservedLeft, stateLeft.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check(cudaMemcpy(devConservedRight, stateRight.data(), nFields * sizeof(Real), cudaMemcpyHostToDevice)); + + // Run kernel + hipLaunchKernelGGL(Calculate_HLLC_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devTestFlux, nx, ny, nz, nGhost, gamma, direction, nFields); + + GPU_Error_Check(); + GPU_Error_Check(cudaMemcpy(testFlux.data(), devTestFlux, nFields * sizeof(Real), cudaMemcpyDeviceToHost)); + + // Make sure to sync with the device so we have the results + cudaDeviceSynchronize(); + GPU_Error_Check(); + + return testFlux; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Check if the fluxes are correct + * + * \param[in] fiducialFlux The fiducial flux in conserved variables. In + * order the elements are: density, x-momentum, y-momentum, z-momentum, + * and energy. + * \param[in] testFlux The test flux in conserved variables. In order + * the elements are: density, x-momentum, y-momentum, z-momentum, and + * energy. + * \param[in] customOutput Any custom output the user would like to + * print. It will print after the default GTest output but before the + * values that failed are printed + */ + void Check_Results(std::vector const &fiducialFlux, std::vector const &testFlux, + std::string const &customOutput = "") + { + // Field names + std::vector const fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", "Energies"}; + + ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) + << "The fiducial flux, test flux, and field name vectors are not all " + "the same length" + << std::endl + << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl + << "testFlux.size() = " << testFlux.size() << std::endl + << "fieldNames.size() = " << fieldNames.size() << std::endl; + + // Check for equality + for (size_t i = 0; i < fieldNames.size(); i++) { + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + + bool areEqual = testing_utilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff); + EXPECT_TRUE(areEqual) << std::endl + << customOutput << std::endl + << "There's a difference in " << fieldNames[i] << " Flux" << std::endl + << "The fiducial value is: " << fiducialFlux[i] << std::endl + << "The test value is: " << testFlux[i] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; } - // ========================================================================= + } + // ===================================================================== +}; +// ========================================================================= + +// ========================================================================= +// Testing Calculate_HLLC_Fluxes_CUDA +/*! +* \brief Test the HLLC solver with the input from the high pressure side of a +sod shock tube. Correct results are hard coded into this test. Similar tests +do not need to be this verbose, simply passing values to the kernel call +should be sufficient in most cases +* +*/ +TEST_F(tHYDROCalculateHLLCFluxesCUDA, // Test suite name + HighPressureSideExpectCorrectOutput) // Test name +{ + // Physical Values + Real const density = 1.0; + Real const pressure = 1.0; + Real const velocityX = 0.0; + Real const velocityY = 0.0; + Real const velocityZ = 0.0; + Real const momentumX = density * velocityX; + Real const momentumY = density * velocityY; + Real const momentumZ = density * velocityZ; + Real const gamma = 1.4; + Real const energy = (pressure / (gamma - 1)) + + 0.5 * density * (velocityX * velocityX + velocityY * velocityY + velocityZ * velocityZ); + + std::vector const state{density, momentumX, momentumY, momentumZ, energy}; + std::vector const fiducialFluxes{0, 1, 0, 0, 0}; + + // Compute the fluxes + std::vector const testFluxes = Compute_Fluxes(state, // Left state + state, // Right state + gamma); // Adiabatic Index + + // Check for correctness + Check_Results(fiducialFluxes, testFluxes); +} +// ========================================================================= #endif diff --git a/src/riemann_solvers/hlld_cuda.cu b/src/riemann_solvers/hlld_cuda.cu index 489647bdb..80d6902c7 100644 --- a/src/riemann_solvers/hlld_cuda.cu +++ b/src/riemann_solvers/hlld_cuda.cu @@ -1,915 +1,513 @@ /*! * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the implementation of the HLLD solver + * \brief Contains the implementation of the HLLD solver from Miyoshi & Kusano + * 2005 "A multi-state HLL approximate Riemann solver for ideal + * magnetohydrodynamics", hereafter referred to as M&K 2005 * -*/ + */ // External Includes // Local Includes -#include "../utils/gpu.hpp" #include "../global/global.h" #include "../global/global_cuda.h" -#include "../utils/mhd_utilities.h" +#include "../grid/grid_enum.h" #include "../riemann_solvers/hlld_cuda.h" +#include "../utils/cuda_utilities.h" +#include "../utils/gpu.hpp" +#include "../utils/hydro_utilities.h" +#include "../utils/math_utilities.h" +#include "../utils/mhd_utilities.h" -#ifdef DE //PRESSURE_DE - #include "../utils/hydro_utilities.h" -#endif // DE - -#ifdef CUDA - // ========================================================================= - __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, - Real *dev_flux, - int nx, - int ny, - int nz, - int n_ghost, - Real gamma, - int direction, - int n_fields) - { - // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int threadId = threadIdx.x + blockId * blockDim.x; - int zid = threadId / (nx*ny); - int yid = (threadId - zid*nx*ny) / nx; - int xid = threadId - zid*nx*ny - yid*nx; - - // Number of cells - int n_cells = nx*ny*nz; - - // Offsets & indices - int o1, o2, o3; - if (direction==0) {o1 = 1; o2 = 2; o3 = 3;} - if (direction==1) {o1 = 2; o2 = 3; o3 = 1;} - if (direction==2) {o1 = 3; o2 = 1; o3 = 2;} - - // Thread guard to avoid overrun - if (xid < nx and yid < ny and zid < nz) - { - // ============================ - // Retrieve conserved variables - // ============================ - // Left interface - Real densityL = dev_bounds_L[threadId]; - Real momentumXL = dev_bounds_L[threadId + n_cells * o1]; - Real momentumYL = dev_bounds_L[threadId + n_cells * o2]; - Real momentumZL = dev_bounds_L[threadId + n_cells * o3]; - Real energyL = dev_bounds_L[threadId + n_cells * 4]; - Real magneticXL = dev_bounds_L[threadId + n_cells * (o1 + 4 + NSCALARS)]; - Real magneticYL = dev_bounds_L[threadId + n_cells * (o2 + 4 + NSCALARS)]; - Real magneticZL = dev_bounds_L[threadId + n_cells * (o3 + 4 + NSCALARS)]; - - #ifdef SCALAR - Real scalarConservedL[NSCALARS]; - for (int i=0; i= 0.0) - { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityFluxL, - momentumFluxXL, momentumFluxYL, momentumFluxZL, - energyFluxL, - magneticFluxYL, magneticFluxZL); - #ifdef SCALAR - for (int i=0; i= 0.0) - { - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityStarFluxL, - momentumStarFluxXL, momentumStarFluxYL, momentumStarFluxZL, - energyStarFluxL, - magneticStarFluxYL, magneticStarFluxZL); - #ifdef SCALAR - for (int i=0; i= 0.0) - { - Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - _hlldInternal::_doubleStarFluxes(speedStarL, - momentumStarFluxXL, - momentumStarFluxYL, - momentumStarFluxZL, - energyStarFluxL, - magneticStarFluxYL, - magneticStarFluxZL, - densityStarL, - speedM, - velocityStarYL, - velocityStarZL, - energyStarL, - magneticStarYL, - magneticStarZL, - speedM, - velocityDoubleStarY, - velocityDoubleStarZ, - energyDoubleStarL, - magneticDoubleStarY, - magneticDoubleStarZ, - momentumDoubleStarFluxX, - momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, - magneticDoubleStarFluxZ); - - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityStarFluxL, - momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - - #ifdef SCALAR - // Return the passive scalar fluxes - for (int i=0; i= 0.0) - { - Real momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ; - _hlldInternal::_doubleStarFluxes(speedStarR, - momentumStarFluxXR, - momentumStarFluxYR, - momentumStarFluxZR, - energyStarFluxR, - magneticStarFluxYR, - magneticStarFluxZR, - densityStarR, - speedM, - velocityStarYR, - velocityStarZR, - energyStarR, - magneticStarYR, - magneticStarZR, - speedM, - velocityDoubleStarY, - velocityDoubleStarZ, - energyDoubleStarR, - magneticDoubleStarY, - magneticDoubleStarZ, - momentumDoubleStarFluxX, - momentumDoubleStarFluxY, - momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, - magneticDoubleStarFluxZ); - - _hlldInternal::_returnFluxes(threadId, o1, o2, o3, n_cells, - dev_flux, - densityStarFluxR, - momentumDoubleStarFluxX, momentumDoubleStarFluxY, momentumDoubleStarFluxZ, - energyDoubleStarFlux, - magneticDoubleStarFluxY, magneticDoubleStarFluxZ); - - #ifdef SCALAR - // Return the passive scalar fluxes - for (int i=0; i= n_cells) { + return; + } + + // Offsets & indices + int o1, o2, o3; + switch (direction) { + case 0: + o1 = grid_enum::momentum_x; + o2 = grid_enum::momentum_y; + o3 = grid_enum::momentum_z; + break; + case 1: + o1 = grid_enum::momentum_y; + o2 = grid_enum::momentum_z; + o3 = grid_enum::momentum_x; + break; + case 2: + o1 = grid_enum::momentum_z; + o2 = grid_enum::momentum_x; + o3 = grid_enum::momentum_y; + break; + } + + // ============================ + // Retrieve state variables + // ============================ + // The magnetic field in the X-direction + Real const magneticX = dev_magnetic_face[threadId]; + + mhd::internal::State const stateL = + mhd::internal::loadState(dev_bounds_L, magneticX, gamma, threadId, n_cells, o1, o2, o3); + mhd::internal::State const stateR = + mhd::internal::loadState(dev_bounds_R, magneticX, gamma, threadId, n_cells, o1, o2, o3); + + // Compute the approximate Left and Right wave speeds + mhd::internal::Speeds speed = mhd::internal::approximateLRWaveSpeeds(stateL, stateR, magneticX, gamma); + + // ================================================================= + // Compute the fluxes in the non-star states + // ================================================================= + // Left state + mhd::internal::Flux fluxL = mhd::internal::nonStarFluxes(stateL, magneticX); + + // If we're in the L state then assign fluxes and return. + // In this state the flow is supersonic + // M&K 2005 equation 66 + if (speed.L > 0.0) { + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); + return; + } + // Right state + mhd::internal::Flux fluxR = mhd::internal::nonStarFluxes(stateR, magneticX); + + // If we're in the R state then assign fluxes and return. + // In this state the flow is supersonic + // M&K 2005 equation 66 + if (speed.R < 0.0) { + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); + return; + } + + // ================================================================= + // Compute the fluxes in the star states + // ================================================================= + // Shared quantities: + // - velocityStarX = speedM + // - totalPrssureStar is the same on both sides + speed.M = approximateMiddleWaveSpeed(stateL, stateR, speed); + Real const totalPressureStar = mhd::internal::starTotalPressure(stateL, stateR, speed); + + // Left star state + mhd::internal::StarState const starStateL = + mhd::internal::computeStarState(stateL, speed, speed.L, magneticX, totalPressureStar); + + // Left star speed + speed.LStar = mhd::internal::approximateStarWaveSpeed(starStateL, speed, magneticX, -1); + + // If we're in the L* state then assign fluxes and return. + // In this state the flow is subsonic + // M&K 2005 equation 66 + if (speed.LStar > 0.0 and speed.L <= 0.0) { + fluxL = mhd::internal::starFluxes(starStateL, stateL, fluxL, speed, speed.L); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); + return; + } + + // Right star state + mhd::internal::StarState const starStateR = + mhd::internal::computeStarState(stateR, speed, speed.R, magneticX, totalPressureStar); + + // Right star speed + speed.RStar = mhd::internal::approximateStarWaveSpeed(starStateR, speed, magneticX, 1); + + // If we're in the R* state then assign fluxes and return. + // In this state the flow is subsonic + // M&K 2005 equation 66 + if (speed.RStar <= 0.0 and speed.R >= 0.0) { + fluxR = mhd::internal::starFluxes(starStateR, stateR, fluxR, speed, speed.R); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); + return; + } + + // ================================================================= + // Compute the fluxes in the double star states + // ================================================================= + mhd::internal::DoubleStarState const doubleStarState = + mhd::internal::computeDoubleStarState(starStateL, starStateR, magneticX, totalPressureStar, speed); + + // Compute and return L** fluxes + // M&K 2005 equation 66 + if (speed.M > 0.0 and speed.LStar <= 0.0) { + fluxL = mhd::internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyL, starStateL, stateL, fluxL, + speed, speed.L, speed.LStar); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxL, stateL); + return; + } + // Compute and return R** fluxes + // M&K 2005 equation 66 + if (speed.RStar > 0.0 and speed.M <= 0.0) { + fluxR = mhd::internal::computeDoubleStarFluxes(doubleStarState, doubleStarState.energyR, starStateR, stateR, fluxR, + speed, speed.R, speed.RStar); + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, dev_flux, fluxR, stateR); + return; + } +} +// ========================================================================= + +namespace internal +{ +// ===================================================================== +__device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, + int const &o2, int const &o3) +{ + mhd::internal::State state; + state.density = interfaceArr[threadId + n_cells * grid_enum::density]; + state.density = fmax(state.density, (Real)TINY_NUMBER); + state.velocityX = interfaceArr[threadId + n_cells * o1] / state.density; + state.velocityY = interfaceArr[threadId + n_cells * o2] / state.density; + state.velocityZ = interfaceArr[threadId + n_cells * o3] / state.density; + state.energy = interfaceArr[threadId + n_cells * grid_enum::Energy]; + state.energy = fmax(state.energy, (Real)TINY_NUMBER); + state.magneticY = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_y]; + state.magneticZ = interfaceArr[threadId + n_cells * grid_enum::Q_x_magnetic_z]; + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + state.scalarSpecific[i] = interfaceArr[threadId + n_cells * (grid_enum::scalar + i)] / state.density; + } + #endif // SCALAR + #ifdef DE + state.thermalEnergySpecific = interfaceArr[threadId + n_cells * grid_enum::GasEnergy] / state.density; + + Real energyNonThermal = hydro_utilities::Calc_Kinetic_Energy_From_Velocity(state.density, state.velocityX, + state.velocityY, state.velocityZ) + + mhd::utils::computeMagneticEnergy(magneticX, state.magneticY, state.magneticZ); + + state.gasPressure = fmax(hydro_utilities::Get_Pressure_From_DE(state.energy, state.energy - energyNonThermal, + state.thermalEnergySpecific * state.density, gamma), + (Real)TINY_NUMBER); + #else + // Note that this function does the positive pressure check + // internally + state.gasPressure = mhd::internal::Calc_Pressure_Primitive(state, magneticX, gamma); + #endif // DE + + state.totalPressure = + mhd::utils::computeTotalPressure(state.gasPressure, magneticX, state.magneticY, state.magneticZ); + + return state; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::internal::Speeds approximateLRWaveSpeeds(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + Real const &magneticX, Real const &gamma) +{ + // Get the fast magnetosonic wave speeds + Real magSonicL = mhd::utils::fastMagnetosonicSpeed(stateL.density, stateL.gasPressure, magneticX, stateL.magneticY, + stateL.magneticZ, gamma); + Real magSonicR = mhd::utils::fastMagnetosonicSpeed(stateR.density, stateR.gasPressure, magneticX, stateR.magneticY, + stateR.magneticZ, gamma); + + // Compute the S_L and S_R wave speeds. + // Version suggested by Miyoshi & Kusano 2005 and used in Athena + // M&K 2005 equation 67 + Real magSonicMax = fmax(magSonicL, magSonicR); + mhd::internal::Speeds speed; + speed.L = fmin(stateL.velocityX, stateR.velocityX) - magSonicMax; + speed.R = fmax(stateL.velocityX, stateR.velocityX) + magSonicMax; + + return speed; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ Real approximateMiddleWaveSpeed(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed) +{ + // Compute the S_M wave speed + // M&K 2005 equation 38 + Real const speed_r_diff = speed.R - stateR.velocityX; + Real const speed_l_diff = speed.L - stateL.velocityX; + + return // Numerator + (speed_r_diff * stateR.density * stateR.velocityX - speed_l_diff * stateL.density * stateL.velocityX - + stateR.totalPressure + stateL.totalPressure) / + // Denominator + (speed_r_diff * stateR.density - speed_l_diff * stateL.density); +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ Real approximateStarWaveSpeed(mhd::internal::StarState const &starState, + mhd::internal::Speeds const &speed, Real const &magneticX, + Real const &side) +{ + // Compute the S_L^* and S_R^* wave speeds + // M&K 2005 equation 51 + return speed.M + side * mhd::utils::alfvenSpeed(magneticX, starState.density); +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::internal::Flux nonStarFluxes(mhd::internal::State const &state, Real const &magneticX) +{ + mhd::internal::Flux flux; + // M&K 2005 equation 2 + flux.density = state.density * state.velocityX; + + flux.momentumX = flux.density * state.velocityX + state.totalPressure - magneticX * magneticX; + flux.momentumY = flux.density * state.velocityY - magneticX * state.magneticY; + flux.momentumZ = flux.density * state.velocityZ - magneticX * state.magneticZ; + + flux.magneticY = state.magneticY * state.velocityX - magneticX * state.velocityY; + flux.magneticZ = state.magneticZ * state.velocityX - magneticX * state.velocityZ; + + // Group transverse terms for FP associative symmetry + flux.energy = state.velocityX * (state.energy + state.totalPressure) - + magneticX * (state.velocityX * magneticX + + ((state.velocityY * state.magneticY) + (state.velocityZ * state.magneticZ))); + + return flux; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, mhd::internal::Flux const &flux, + mhd::internal::State const &state) +{ + // Note that the direction of the grid_enum::fluxX_magnetic_DIR is the + // direction of the electric field that the magnetic flux is, not the magnetic + // flux + dev_flux[threadId + n_cells * grid_enum::density] = flux.density; + dev_flux[threadId + n_cells * o1] = flux.momentumX; + dev_flux[threadId + n_cells * o2] = flux.momentumY; + dev_flux[threadId + n_cells * o3] = flux.momentumZ; + dev_flux[threadId + n_cells * grid_enum::Energy] = flux.energy; + dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_z] = flux.magneticY; + dev_flux[threadId + n_cells * grid_enum::fluxX_magnetic_y] = flux.magneticZ; + + #ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + dev_flux[threadId + n_cells * (grid_enum::scalar + i)] = state.scalarSpecific[i] * flux.density; + } + #endif // SCALAR + #ifdef DE + dev_flux[threadId + n_cells * grid_enum::GasEnergy] = state.thermalEnergySpecific * flux.density; + #endif // DE +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ Real starTotalPressure(mhd::internal::State const &stateL, mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed) +{ + // M&K 2005 equation 41 + return // Numerator + (stateR.density * stateL.totalPressure * (speed.R - stateR.velocityX) - + stateL.density * stateR.totalPressure * (speed.L - stateL.velocityX) + + stateL.density * stateR.density * (speed.R - stateR.velocityX) * (speed.L - stateL.velocityX) * + (stateR.velocityX - stateL.velocityX)) / + // Denominator + (stateR.density * (speed.R - stateR.velocityX) - stateL.density * (speed.L - stateL.velocityX)); +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::internal::StarState computeStarState(mhd::internal::State const &state, + mhd::internal::Speeds const &speed, Real const &speedSide, + Real const &magneticX, Real const &totalPressureStar) +{ + mhd::internal::StarState starState; + + // Compute the densities in the star state + // M&K 2005 equation 43 + starState.density = state.density * (speedSide - state.velocityX) / (speedSide - speed.M); + + // Check for and handle the degenerate case + // Explained at the top of page 326 in M&K 2005 + if (fabs(state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX)) < + totalPressureStar * mhd::internal::_hlldSmallNumber) { + starState.velocityY = state.velocityY; + starState.velocityZ = state.velocityZ; + starState.magneticY = state.magneticY; + starState.magneticZ = state.magneticZ; + } else { + // Denominator for M&K 2005 equations 44-47 + Real const denom = state.density * (speedSide - state.velocityX) * (speedSide - speed.M) - (magneticX * magneticX); + + // Compute the velocity and magnetic field in the star state + // M&K 2005 equations 44 & 46 + Real coef = magneticX * (speed.M - state.velocityX) / denom; + starState.velocityY = state.velocityY - state.magneticY * coef; + starState.velocityZ = state.velocityZ - state.magneticZ * coef; + + // M&K 2005 equations 45 & 47 + Real tmpPower = (speedSide - state.velocityX); + tmpPower = tmpPower * tmpPower; + coef = (state.density * tmpPower - (magneticX * magneticX)) / denom; + starState.magneticY = state.magneticY * coef; + starState.magneticZ = state.magneticZ * coef; + } + + // M&K 2005 equation 48 + starState.energy = (state.energy * (speedSide - state.velocityX) - state.totalPressure * state.velocityX + + totalPressureStar * speed.M + + magneticX * (math_utils::dotProduct(state.velocityX, state.velocityY, state.velocityZ, magneticX, + state.magneticY, state.magneticZ) - + math_utils::dotProduct(speed.M, starState.velocityY, starState.velocityZ, magneticX, + starState.magneticY, starState.magneticZ))) / + (speedSide - speed.M); + + return starState; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::internal::Flux starFluxes(mhd::internal::StarState const &starState, + mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide) +{ + mhd::internal::Flux starFlux; + + // Now compute the star state fluxes + // M&K 2005 equations 64 + starFlux.density = flux.density + speedSide * (starState.density - state.density); + starFlux.momentumX = flux.momentumX + speedSide * (starState.density * speed.M - state.density * state.velocityX); + starFlux.momentumY = + flux.momentumY + speedSide * (starState.density * starState.velocityY - state.density * state.velocityY); + starFlux.momentumZ = + flux.momentumZ + speedSide * (starState.density * starState.velocityZ - state.density * state.velocityZ); + starFlux.energy = flux.energy + speedSide * (starState.energy - state.energy); + starFlux.magneticY = flux.magneticY + speedSide * (starState.magneticY - state.magneticY); + starFlux.magneticZ = flux.magneticZ + speedSide * (starState.magneticZ - state.magneticZ); + + return starFlux; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::internal::DoubleStarState computeDoubleStarState(mhd::internal::StarState const &starStateL, + mhd::internal::StarState const &starStateR, + Real const &magneticX, + Real const &totalPressureStar, + mhd::internal::Speeds const &speed) +{ + mhd::internal::DoubleStarState doubleStarState; + + // if Bx is zero then just return the star state + // Explained at the top of page 328 in M&K 2005. Essentially when + // magneticX is 0 this reduces to the HLLC solver + if (0.5 * (magneticX * magneticX) < mhd::internal::_hlldSmallNumber * totalPressureStar) { + if (speed.M >= 0.0) { + // We're in the L** state but Bx=0 so return L* state + doubleStarState.velocityY = starStateL.velocityY; + doubleStarState.velocityZ = starStateL.velocityZ; + doubleStarState.magneticY = starStateL.magneticY; + doubleStarState.magneticZ = starStateL.magneticZ; + doubleStarState.energyL = starStateL.energy; + } else { + // We're in the L** state but Bx=0 so return L* state + doubleStarState.velocityY = starStateR.velocityY; + doubleStarState.velocityZ = starStateR.velocityZ; + doubleStarState.magneticY = starStateR.magneticY; + doubleStarState.magneticZ = starStateR.magneticZ; + doubleStarState.energyR = starStateR.energy; + } + } else { + // Setup some variables we'll need later + Real sqrtDL = sqrt(starStateL.density); + Real sqrtDR = sqrt(starStateR.density); + Real inverseDensities = 1.0 / (sqrtDL + sqrtDR); + Real magXSign = copysign(1.0, magneticX); + + // All we need to do now is compute the transverse velocities + // and magnetic fields along with the energy + + // Double Star velocities + // M&K 2005 equations 59 & 60 + doubleStarState.velocityY = inverseDensities * (sqrtDL * starStateL.velocityY + sqrtDR * starStateR.velocityY + + magXSign * (starStateR.magneticY - starStateL.magneticY)); + doubleStarState.velocityZ = inverseDensities * (sqrtDL * starStateL.velocityZ + sqrtDR * starStateR.velocityZ + + magXSign * (starStateR.magneticZ - starStateL.magneticZ)); + + // Double star magnetic fields + // M&K 2005 equations 61 & 62 + doubleStarState.magneticY = + inverseDensities * (sqrtDL * starStateR.magneticY + sqrtDR * starStateL.magneticY + + magXSign * (sqrtDL * sqrtDR) * (starStateR.velocityY - starStateL.velocityY)); + doubleStarState.magneticZ = + inverseDensities * (sqrtDL * starStateR.magneticZ + sqrtDR * starStateL.magneticZ + + magXSign * (sqrtDL * sqrtDR) * (starStateR.velocityZ - starStateL.velocityZ)); + + // Double star energy + Real velDblStarDotMagDblStar = + math_utils::dotProduct(speed.M, doubleStarState.velocityY, doubleStarState.velocityZ, magneticX, + doubleStarState.magneticY, doubleStarState.magneticZ); + // M&K 2005 equation 63 + doubleStarState.energyL = + starStateL.energy - sqrtDL * magXSign * + (math_utils::dotProduct(speed.M, starStateL.velocityY, starStateL.velocityZ, magneticX, + starStateL.magneticY, starStateL.magneticZ) - + velDblStarDotMagDblStar); + doubleStarState.energyR = + starStateR.energy + sqrtDR * magXSign * + (math_utils::dotProduct(speed.M, starStateR.velocityY, starStateR.velocityZ, magneticX, + starStateR.magneticY, starStateR.magneticZ) - + velDblStarDotMagDblStar); + } + + return doubleStarState; +} +// ===================================================================== + +// ===================================================================== +__device__ __host__ mhd::internal::Flux computeDoubleStarFluxes( + mhd::internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, + mhd::internal::StarState const &starState, mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar) +{ + mhd::internal::Flux doubleStarFlux; + + Real const speed_diff = speedSideStar - speedSide; + + // M&K 2005 equation 65 + doubleStarFlux.density = + flux.density - speedSide * state.density - speed_diff * starState.density + speedSideStar * starState.density; + + doubleStarFlux.momentumX = flux.momentumX - speedSide * (state.density * state.velocityX) - + speed_diff * (starState.density * speed.M) + speedSideStar * (starState.density * speed.M); + doubleStarFlux.momentumY = flux.momentumY - speedSide * (state.density * state.velocityY) - + speed_diff * (starState.density * starState.velocityY) + + speedSideStar * (starState.density * doubleStarState.velocityY); + doubleStarFlux.momentumZ = flux.momentumZ - speedSide * (state.density * state.velocityZ) - + speed_diff * (starState.density * starState.velocityZ) + + speedSideStar * (starState.density * doubleStarState.velocityZ); + doubleStarFlux.energy = + flux.energy - speedSide * state.energy - speed_diff * starState.energy + speedSideStar * doubleStarStateEnergy; + doubleStarFlux.magneticY = flux.magneticY - speedSide * state.magneticY - speed_diff * starState.magneticY + + speedSideStar * doubleStarState.magneticY; + doubleStarFlux.magneticZ = flux.magneticZ - speedSide * state.magneticZ - speed_diff * starState.magneticZ + + speedSideStar * doubleStarState.magneticZ; + + return doubleStarFlux; +} +// ===================================================================== + +} // namespace internal +} // end namespace mhd +#endif // MHD diff --git a/src/riemann_solvers/hlld_cuda.h b/src/riemann_solvers/hlld_cuda.h index d8d58dce1..8c547e889 100644 --- a/src/riemann_solvers/hlld_cuda.h +++ b/src/riemann_solvers/hlld_cuda.h @@ -1,7 +1,9 @@ /*! * \file hlld_cuda.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the declaration of the HLLD solver + * \brief Contains the declaration of the HLLD solver from Miyoshi & Kusano 2005 + * "A multi-state HLL approximate Riemann solver for ideal + * magnetohydrodynamics", hereafter referred to as M&K 2005 * */ @@ -11,385 +13,257 @@ // Local Includes #include "../global/global.h" +#include "../utils/hydro_utilities.h" -#ifdef CUDA +/*! + * \brief Namespace for MHD code + * + */ +namespace mhd +{ +/*! + * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 + * + * \param[in] dev_bounds_L The interface states on the left side of the + * interface + * \param[in] dev_bounds_R The interface states on the right side of + * the interface + * \param[in] dev_magnetic_face A pointer to the begining of the + * conserved magnetic field array that is stored at the interface. I.e. for the + * X-direction solve this would be the begining of the X-direction fields + * \param[out] dev_flux The output flux + * \param[in] n_cells Total number of cells + * \param[in] n_ghost Number of ghost cells on each side + * \param[in] dir The direction that the solve is taking place in. 0=X, 1=Y, + * 2=Z + * \param[in] n_fields The total number of fields + */ +__global__ void Calculate_HLLD_Fluxes_CUDA(Real const *dev_bounds_L, Real const *dev_bounds_R, + Real const *dev_magnetic_face, Real *dev_flux, int const n_cells, + Real const gamma, int const direction, int const n_fields); + +/*! + * \brief Namespace to hold private functions used within the HLLD + * solver + * + */ +namespace internal +{ +/*! + * \brief Used for some comparisons. Value was chosen to match what is + * used in Athena + */ +Real static const _hlldSmallNumber = 1.0e-8; + +/*! + * \brief Holds all the data needed for the non-star states of the HLLD solver + * + */ +struct State { + Real density, velocityX, velocityY, velocityZ, energy, magneticY, magneticZ, gasPressure, totalPressure; +#ifdef SCALAR + Real scalarSpecific[grid_enum::nscalars]; +#endif // SCALAR +#ifdef DE + Real thermalEnergySpecific; +#endif // DE +}; + +/*! + * \brief Holds all the data needed for the star states of the HLLD solver + * except total pressure and x velocity as those are shared between the left and + * right states + * + */ +struct StarState { + // velocityStarX = Speeds.M + // Total pressure is computed on its own since it's shared + Real density, velocityY, velocityZ, energy, magneticY, magneticZ; +}; + +/*! + * \brief Holds all the data needed for the double star states of the HLLD + * solver except the x velocity, density, and total pressure since those are all + * inherited from the star state. + * + */ +struct DoubleStarState { + // velocityDoubleStarX = Speeds.M + // densityDoubleStar = densityStar + // pressureDoubleStar = pressureStar + // Shared values + Real velocityY, velocityZ, magneticY, magneticZ; + // Different values. Initializing these since one or the other can be uninitializing leading to bad tests + Real energyL = 0.0, energyR = 0.0; +}; + +/*! + * \brief Holds all the data needed for the fluxes in the HLLD solver + * + */ +struct Flux { + Real density, momentumX, momentumY, momentumZ, energy, magneticY, magneticZ; +}; + +/*! + * \brief Holds all the data needed for the speeds in the HLLD solver + * + */ +struct Speeds { + Real L, LStar, M, RStar, R; +}; + +/*! + * \brief Load and compute the left or right state + * + * \param interfaceArr The interface array to load from + * \param magneticX The X magnetic field + * \param gamma The adiabatic index + * \param threadId The thread ID + * \param n_cells Total number of cells + * \param o1 Direction parameter + * \param o2 Direction parameter + * \param o3 Direction parameter + * \return mhd::internal::State The loaded state + */ +__device__ __host__ mhd::internal::State loadState(Real const *interfaceArr, Real const &magneticX, Real const &gamma, + int const &threadId, int const &n_cells, int const &o1, + int const &o2, int const &o3); - /*! - * \brief Compute the HLLD fluxes from Miyoshi & Kusano 2005 - * - * \param[in] dev_bounds_L - * \param[in] dev_bounds_R - * \param[out] dev_flux - * \param[in] nx - * \param[in] ny - * \param[in] nz - * \param[in] n_ghost - * \param[in] gamma - * \param[in] dir - * \param[in] n_fields - */ - __global__ void Calculate_HLLD_Fluxes_CUDA(Real *dev_bounds_L, - Real *dev_bounds_R, - Real *dev_flux, - int nx, - int ny, - int nz, - int n_ghost, - Real gamma, - int direction, - int n_fields); +/*! + * \brief Compute the approximate left and right wave speeds. M&K 2005 equation + * 67 + */ +__device__ __host__ mhd::internal::Speeds approximateLRWaveSpeeds(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + Real const &magneticX, Real const &gamma); - /*! - * \brief Namespace to hold private functions used within the HLLD - * solver - * - */ - namespace _hlldInternal - { - /*! - * \brief Used for some comparisons. Value was chosen to match what is - * used in Athena - */ - Real static const _hlldSmallNumber = 1.0e-8; +/*! + * \brief Compute the approximate middle wave speed. M&K 2005 equation 38 + */ +__device__ __host__ Real approximateMiddleWaveSpeed(mhd::internal::State const &stateL, + mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed); - /*! - * \brief Compute the left, right, star, and middle wave speeds. Also - * returns the densities in the star states - * - * \param[in] densityL Density, left side - * \param[in] momentumXL Momentum in the X-direction, left side - * \param[in] momentumYL Momentum in the Y-direction, left side - * \param[in] momentumZL Momentum in the Z-direction, left side - * \param[in] velocityXL Velocity in the X-direction, left side - * \param[in] velocityYL Velocity in the Y-direction, left side - * \param[in] velocityZL Velocity in the Z-direction, left side - * \param[in] gasPressureL Gas pressure, left side - * \param[in] totalPressureL Total MHD pressure, left side - * \param[in] magneticXL Magnetic field in the X-direction, left side - * \param[in] magneticYL Magnetic field in the Y-direction, left side - * \param[in] magneticZL Magnetic field in the Z-direction, left side - * \param[in] densityR Density, right side - * \param[in] momentumXR Momentum in the X-direction, right side - * \param[in] momentumYR Momentum in the Y-direction, right side - * \param[in] momentumZR Momentum in the Z-direction, right side - * \param[in] velocityXR Velocity in the X-direction, right side - * \param[in] velocityYR Velocity in the Y-direction, right side - * \param[in] velocityZR Velocity in the Z-direction, right side - * \param[in] gasPressureR Gas pressure, right side - * \param[in] totalPressureR Total MHD pressure, right side - * \param[in] magneticXR Magnetic field in the X-direction, right side - * \param[in] magneticYR Magnetic field in the Y-direction, right side - * \param[in] magneticZR Magnetic field in the Z-direction, right side - * \param[in] gamma Adiabatic index - * \param[out] speedL Approximate speed of the left most wave - * \param[out] speedR Approximate speed of the right most wave - * \param[out] speedM Speed of the middle wave - * \param[out] speedStarL Speed of the left star state wave - * \param[out] speedStarR Speed of the right star state wave - * \param[out] densityStarL Density in left star region - * \param[out] densityStarR Density in right star region - */ - __device__ __host__ void _approximateWaveSpeeds(Real const &densityL, - Real const &momentumXL, - Real const &momentumYL, - Real const &momentumZL, - Real const &velocityXL, - Real const &velocityYL, - Real const &velocityZL, - Real const &gasPressureL, - Real const &totalPressureL, - Real const &magneticXL, - Real const &magneticYL, - Real const &magneticZL, - Real const &densityR, - Real const &momentumXR, - Real const &momentumYR, - Real const &momentumZR, - Real const &velocityXR, - Real const &velocityYR, - Real const &velocityZR, - Real const &gasPressureR, - Real const &totalPressureR, - Real const &magneticXR, - Real const &magneticYR, - Real const &magneticZR, - Real const &gamma, - Real &speedL, - Real &speedR, - Real &speedM, - Real &speedStarL, - Real &speedStarR, - Real &densityStarL, - Real &densityStarR); +/*! + * \brief Compute the approximate left and right wave speeds. M&K 2005 equation + * 51 + */ +__device__ __host__ Real approximateStarWaveSpeed(mhd::internal::StarState const &starState, + mhd::internal::Speeds const &speed, Real const &magneticX, + Real const &side); - /*! - * \brief Compute the fluxes in the left or right non-star state - * - * \param[in] momentumX Momentum in the X-direction - * \param[in] velocityX Velocity in the X-direction - * \param[in] velocityY Velocity in the Y-direction - * \param[in] velocityZ Velocity in the Z-direction - * \param[in] totalPressure Total MHD pressure - * \param[in] energy Energy - * \param[in] magneticX Magnetic field in -direction - * \param[in] magneticY Magnetic field in -direction - * \param[in] magneticZ Magnetic field in -direction - * \param[out] densityFlux The density flux - * \param[out] momentumFluxX The momentum flux in the X-direction - * \param[out] momentumFluxY The momentum flux in the Y-direction - * \param[out] momentumFluxZ The momentum flux in the Z-direction - * \param[out] magneticFluxY The magnetic field flux in the Y-direction - * \param[out] magneticFluxZ The magnetic field flux in the Z-direction - * \param[out] energyFlux The energy flux - */ - __device__ __host__ void _nonStarFluxes(Real const &momentumX, - Real const &velocityX, - Real const &velocityY, - Real const &velocityZ, - Real const &totalPressure, - Real const &energy, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real &densityFlux, - Real &momentumFluxX, - Real &momentumFluxY, - Real &momentumFluxZ, - Real &magneticFluxY, - Real &magneticFluxZ, - Real &energyFlux); +/*! + * \brief Compute the fluxes in the left or right non-star state. M&K 2005 + * equation 2 + * + * \param state The state to compute the flux of + * \param magneticX The X magnetic field + * \return mhd::internal::Flux The flux in the state + */ +__device__ __host__ mhd::internal::Flux nonStarFluxes(mhd::internal::State const &state, Real const &magneticX); - /*! - * \brief Assign the given flux values to the dev_flux array - * - * \param[in] threadId The thread ID - * \param[in] o1 Offset to get indexing right - * \param[in] o2 Offset to get indexing right - * \param[in] o3 Offset to get indexing right - * \param[in] n_cells Number of cells - * \param[out] dev_flux The flux array - * \param[in] densityFlux The density flux - * \param[in] momentumFluxX The momentum flux in the X-direction - * \param[in] momentumFluxY The momentum flux in the Y-direction - * \param[in] momentumFluxZ The momentum flux in the Z-direction - * \param[in] magneticFluxY The magnetic field flux in the X-direction - * \param[in] magneticFluxZ The magnetic field flux in the Y-direction - * \param[in] energyFlux The energy flux - */ - __device__ __host__ void _returnFluxes(int const &threadId, - int const &o1, - int const &o2, - int const &o3, - int const &n_cells, - Real *dev_flux, - Real const &densityFlux, - Real const &momentumFluxX, - Real const &momentumFluxY, - Real const &momentumFluxZ, - Real const &magneticFluxY, - Real const &magneticFluxZ, - Real const &energyFlux); +/*! + * \brief Write the given flux values to the dev_flux array + * + * \param[in] threadId The thread ID + * \param[in] o1 Offset to get indexing right + * \param[in] o2 Offset to get indexing right + * \param[in] o3 Offset to get indexing right + * \param[in] n_cells Number of cells + * \param[out] dev_flux The flux array + * \param[in] flux The fluxes to write out + * \param[in] state The left or right state depending on if this is a return for + * one of the left states or one of the right states + */ +__device__ __host__ void returnFluxes(int const &threadId, int const &o1, int const &o2, int const &o3, + int const &n_cells, Real *dev_flux, mhd::internal::Flux const &flux, + mhd::internal::State const &state); - /*! - * \brief Compute the fluxes in the left or right star state - * - * \param[in] speedM Speed of the central wave - * \param[in] speedSide Speed of the non-star wave on the side being computed - * \param[in] density Density - * \param[in] velocityX Velocity in the X-direction - * \param[in] velocityY Velocity in the Y-direction - * \param[in] velocityZ Velocity in the Z-direction - * \param[in] momentumX Momentum in the X-direction - * \param[in] momentumY Momentum in the Y-direction - * \param[in] momentumZ Momentum in the Z-direction - * \param[in] energy Energy - * \param[in] totalPressure Total MHD pressure - * \param[in] magneticX Magnetic field in the X-direction - * \param[in] magneticY Magnetic field in the Y-direction - * \param[in] magneticZ Magnetic field in the Z-direction - * \param[in] densityStar Density in the star state - * \param[in] totalPressureStar Total MHD pressure in the star state - * \param[in] densityFlux Density Flux from the non-star state - * \param[in] momentumFluxX Momentum flux from the non-star state in the X-direction - * \param[in] momentumFluxY Momentum flux from the non-star state in the Y-direction - * \param[in] momentumFluxZ Momentum flux from the non-star state in the Z-direction - * \param[in] energyFlux Energy flux from the non-star state - * \param[in] magneticFluxY Magnetic flux from the non-star state in the X-direction - * \param[in] magneticFluxZ Magnetic flux from the non-star state in the Y-direction - * \param[out] velocityStarY Velocity in the star state in the Y-direction - * \param[out] velocityStarZ Velocity in the star state in the Z-direction - * \param[out] energyStar Energy in the star state - * \param[out] magneticStarY Magnetic field in the star state in the X-direction - * \param[out] magneticStarZ Magnetic field in the star state in the Y-direction - * \param[out] densityStarFlux Density flux in the star state - * \param[out] momentumStarFluxX Momentum flux in the star state in the X-direction - * \param[out] momentumStarFluxY Momentum flux in the star state in the Y-direction - * \param[out] momentumStarFluxZ Momentum flux in the star state in the Z-direction - * \param[out] energyStarFlux Energy flux in the star state - * \param[out] magneticStarFluxY Magnetic field flux in the star state in the X-direction - * \param[out] magneticStarFluxZ Magnetic field flux in the star state in the Y-direction - * - */ - __device__ __host__ void _starFluxes(Real const &speedM, - Real const &speedSide, - Real const &density, - Real const &velocityX, - Real const &velocityY, - Real const &velocityZ, - Real const &momentumX, - Real const &momentumY, - Real const &momentumZ, - Real const &energy, - Real const &totalPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &densityStar, - Real const &totalPressureStar, - Real const &densityFlux, - Real const &momentumFluxX, - Real const &momentumFluxY, - Real const &momentumFluxZ, - Real const &energyFlux, - Real const &magneticFluxY, - Real const &magneticFluxZ, - Real &velocityStarY, - Real &velocityStarZ, - Real &energyStar, - Real &magneticStarY, - Real &magneticStarZ, - Real &densityStarFlux, - Real &momentumStarFluxX, - Real &momentumStarFluxY, - Real &momentumStarFluxZ, - Real &energyStarFlux, - Real &magneticStarFluxY, - Real &magneticStarFluxZ); +/*! + * \brief Compute the total pressure in the star states. M&K 2005 equation 41 + * + * \param stateL The left state + * \param stateR The right state + * \param speed The wave speeds + * \return Real The total pressure in the star state + */ +__device__ __host__ Real starTotalPressure(mhd::internal::State const &stateL, mhd::internal::State const &stateR, + mhd::internal::Speeds const &speed); - /*! - * \brief Compute the dot product of a and b. - * - * \param[in] a1 The first element of a - * \param[in] a2 The second element of a - * \param[in] a3 The third element of a - * \param[in] b1 The first element of b - * \param[in] b2 The second element of b - * \param[in] b3 The third element of b - * - * \return Real The dot product of a and b - */ - inline __device__ __host__ Real _dotProduct(Real const &a1, - Real const &a2, - Real const &a3, - Real const &b1, - Real const &b2, - Real const &b3) - {return a1*b1 + ((a2*b2) + (a3*b3));}; +/*! + * \brief Compute the L* or R* state. M&K 2005 equations 43-48 + * + * \param state The non-star state on the same side as the desired star + * state \param speed The wavespeeds \param speedSide The wave speed on the + * same side as the desired star state \param magneticX The magnetic field + * in the x direction \param totalPressureStar The total pressure in the + * star state \return mhd::internal::StarState The computed star state + */ +__device__ __host__ mhd::internal::StarState computeStarState(mhd::internal::State const &state, + mhd::internal::Speeds const &speed, Real const &speedSide, + Real const &magneticX, Real const &totalPressureStar); - /*! - * \brief Compute the double star state - * - * \param[in] speedM - * \param[in] magneticX - * \param[in] totalPressureStar - * \param[in] densityStarL - * \param[in] velocityStarYL - * \param[in] velocityStarZL - * \param[in] energyStarL - * \param[in] magneticStarYL - * \param[in] magneticStarZL - * \param[in] densityStarR - * \param[in] velocityStarYR - * \param[in] velocityStarZR - * \param[in] energyStarR - * \param[in] magneticStarYR - * \param[in] magneticStarZR - * \param[out] velocityDoubleStarY - * \param[out] velocityDoubleStarZ - * \param[out] magneticDoubleStarY - * \param[out] magneticDoubleStarZ - * \param[out] energyDoubleStarL - * \param[out] energyDoubleStarR - */ - __device__ __host__ void _doubleStarState(Real const &speedM, - Real const &magneticX, - Real const &totalPressureStar, - Real const &densityStarL, - Real const &velocityStarYL, - Real const &velocityStarZL, - Real const &energyStarL, - Real const &magneticStarYL, - Real const &magneticStarZL, - Real const &densityStarR, - Real const &velocityStarYR, - Real const &velocityStarZR, - Real const &energyStarR, - Real const &magneticStarYR, - Real const &magneticStarZR, - Real &velocityDoubleStarY, - Real &velocityDoubleStarZ, - Real &magneticDoubleStarY, - Real &magneticDoubleStarZ, - Real &energyDoubleStarL, - Real &energyDoubleStarR); +/*! + * \brief Compute the flux in the star state. M&K 2005 equation 64 + * + * \param starState The star state to compute the flux of + * \param state The non-star state on the same side as the star state + * \param flux The non-star flux on the same side as the star state + * \param speed The wave speeds + * \param speedSide The non-star wave speed on the same side as the star state + * \return mhd::internal::Flux The flux in the star state + */ +__device__ __host__ mhd::internal::Flux starFluxes(mhd::internal::StarState const &starState, + mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide); - /*! - * \brief Compute the double star state fluxes - * - * \param[in] speedStarSide The star speed on the side being computed - * \param[in] momentumStarFluxX - * \param[in] momentumStarFluxY - * \param[in] momentumStarFluxZ - * \param[in] energyStarFlux - * \param[in] magneticStarFluxY - * \param[in] magneticStarFluxZ - * \param[in] densityStar - * \param[in] velocityStarX - * \param[in] velocityStarY - * \param[in] velocityStarZ - * \param[in] energyStar - * \param[in] magneticStarY - * \param[in] magneticStarZ - * \param[in] velocityDoubleStarX - * \param[in] velocityDoubleStarY - * \param[in] velocityDoubleStarZ - * \param[in] energyDoubleStar - * \param[in] magneticDoubleStarY - * \param[in] magneticDoubleStarZ - * \param[out] momentumDoubleStarFluxX - * \param[out] momentumDoubleStarFluxY - * \param[out] momentumDoubleStarFluxZ - * \param[out] energyDoubleStarFlux - * \param[out] magneticDoubleStarFluxY - * \param[out] magneticDoubleStarFluxZ - */ - __device__ __host__ void _doubleStarFluxes(Real const &speedStarSide, - Real const &momentumStarFluxX, - Real const &momentumStarFluxY, - Real const &momentumStarFluxZ, - Real const &energyStarFlux, - Real const &magneticStarFluxY, - Real const &magneticStarFluxZ, - Real const &densityStar, - Real const &velocityStarX, - Real const &velocityStarY, - Real const &velocityStarZ, - Real const &energyStar, - Real const &magneticStarY, - Real const &magneticStarZ, - Real const &velocityDoubleStarX, - Real const &velocityDoubleStarY, - Real const &velocityDoubleStarZ, - Real const &energyDoubleStar, - Real const &magneticDoubleStarY, - Real const &magneticDoubleStarZ, - Real &momentumDoubleStarFluxX, - Real &momentumDoubleStarFluxY, - Real &momentumDoubleStarFluxZ, - Real &energyDoubleStarFlux, - Real &magneticDoubleStarFluxY, - Real &magneticDoubleStarFluxZ); +/*! + * \brief Compute the double star state. M&K 2005 equations 59-63 + * + * \param starStateL The Left star state + * \param starStateR The Right star state + * \param magneticX The x magnetic field + * \param totalPressureStar The total pressure in the star state + * \param speed The approximate wave speeds + * \return mhd::internal::DoubleStarState The double star state + */ +__device__ __host__ mhd::internal::DoubleStarState computeDoubleStarState(mhd::internal::StarState const &starStateL, + mhd::internal::StarState const &starStateR, + Real const &magneticX, + Real const &totalPressureStar, + mhd::internal::Speeds const &speed); - } // _hlldInternal namespace +/*! + * \brief Compute the double star state fluxes. M&K 2005 equation 65 + * + * \param doubleStarState The double star states + * \param starState The star state on the same side + * \param state The non-star state on the same side + * \param flux The non-star flux on the same side + * \param speed The approximate wave speeds + * \param speedSide The wave speed on the same side + * \param speedSideStar The star wave speed on the same side + * \return __device__ + */ +__device__ __host__ mhd::internal::Flux computeDoubleStarFluxes( + mhd::internal::DoubleStarState const &doubleStarState, Real const &doubleStarStateEnergy, + mhd::internal::StarState const &starState, mhd::internal::State const &state, mhd::internal::Flux const &flux, + mhd::internal::Speeds const &speed, Real const &speedSide, Real const &speedSideStar); -#endif //CUDA +/*! + * \brief Specialization of mhd::utils::computeGasPressure for use in the HLLD solver + * + * \param state The State to compute the gas pressure of + * \param magneticX The X magnetic field + * \param gamma The adiabatic index + * \return Real The gas pressure + */ +inline __host__ __device__ Real Calc_Pressure_Primitive(mhd::internal::State const &state, Real const &magneticX, + Real const &gamma) +{ + return hydro_utilities::Calc_Pressure_Primitive(state.energy, state.density, state.velocityX, state.velocityY, + state.velocityZ, gamma, magneticX, state.magneticY, state.magneticZ); +} +} // namespace internal +} // end namespace mhd diff --git a/src/riemann_solvers/hlld_cuda_tests.cu b/src/riemann_solvers/hlld_cuda_tests.cu index 754c2dba0..7fc96bf0c 100644 --- a/src/riemann_solvers/hlld_cuda_tests.cu +++ b/src/riemann_solvers/hlld_cuda_tests.cu @@ -1,2581 +1,2411 @@ /*! -* \file hlld_cuda_tests.cpp -* \author Robert 'Bob' Caddy (rvc@pitt.edu) -* \brief Test the code units within hlld_cuda.cu -* -*/ + * \file hlld_cuda_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Test the code units within hlld_cuda.cu + * + */ // STL Includes +#include #include +#include #include -#include -#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global_cuda.h" +#include "../grid/grid_enum.h" +#include "../riemann_solvers/hlld_cuda.h" // Include code to test #include "../utils/gpu.hpp" -#include "../utils/testing_utilities.h" +#include "../utils/hydro_utilities.h" #include "../utils/mhd_utilities.h" -#include "../riemann_solvers/hlld_cuda.h" // Include code to test - -#if defined(CUDA) && defined(HLLD) - // ========================================================================= - // Integration tests for the entire HLLD solver. Unit tests are below - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test fixture for simple testing of the HLLD Riemann Solver. - Effectively takes the left state, right state, fiducial fluxes, and - custom user output then performs all the required running and testing - * - */ - class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test - { - protected: - // ===================================================================== - /*! - * \brief Compute and return the HLLD fluxes - * - * \param[in] leftState The state on the left side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] rightState The state on the right side in conserved - * variables. In order the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] gamma The adiabatic index - * \param[in] direction Which plane the interface is. 0 = plane normal to - * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. - * \return std::vector - */ - std::vector computeFluxes(std::vector stateLeft, - std::vector stateRight, - Real const &gamma, - int const &direction=0) - { - - // Rearrange X, Y, and Z values if a different direction is chosen - // besides default - stateLeft = _cycleXYZ(stateLeft, direction); - stateRight = _cycleXYZ(stateRight, direction); - - // Simulation Paramters - int const nx = 1; // Number of cells in the x-direction? - int const ny = 1; // Number of cells in the y-direction? - int const nz = 1; // Number of cells in the z-direction? - int const nGhost = 0; // Isn't actually used it appears - int nFields = 8; // Total number of conserved fields - #ifdef SCALAR - nFields += NSCALARS; - #endif // SCALAR - #ifdef DE - nFields++; - #endif //DE - - // Launch Parameters - dim3 const dimGrid (1,1,1); // How many blocks in the grid - dim3 const dimBlock(1,1,1); // How many threads per block - - // Create the std::vector to store the fluxes and declare the device - // pointers - std::vector testFlux(nFields); - Real *devConservedLeft; - Real *devConservedRight; - Real *devTestFlux; - - // Allocate device arrays and copy data - CudaSafeCall(cudaMalloc(&devConservedLeft, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devConservedRight, nFields*sizeof(Real))); - CudaSafeCall(cudaMalloc(&devTestFlux, nFields*sizeof(Real))); - - CudaSafeCall(cudaMemcpy(devConservedLeft, - stateLeft.data(), - nFields*sizeof(Real), - cudaMemcpyHostToDevice)); - CudaSafeCall(cudaMemcpy(devConservedRight, - stateRight.data(), - nFields*sizeof(Real), - cudaMemcpyHostToDevice)); - - // Run kernel - hipLaunchKernelGGL(Calculate_HLLD_Fluxes_CUDA, - dimGrid, - dimBlock, - 0, - 0, - devConservedLeft, // the "left" interface - devConservedRight, // the "right" interface - devTestFlux, - nx, - ny, - nz, - nGhost, - gamma, - direction, - nFields); - - CudaCheckError(); - CudaSafeCall(cudaMemcpy(testFlux.data(), - devTestFlux, - nFields*sizeof(Real), - cudaMemcpyDeviceToHost)); - - // Make sure to sync with the device so we have the results - cudaDeviceSynchronize(); - CudaCheckError(); - - return testFlux; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Check if the fluxes are correct - * - * \param[in] fiducialFlux The fiducial flux in conserved variables. In - * order the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] scalarFlux The fiducial flux in the passive scalars - * \param[in] thermalEnergyFlux The fiducial flux in the dual energy - * thermal energy - * \param[in] testFlux The test flux in conserved variables. In order the - * elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \param[in] customOutput Any custom output the user would like to - * print. It will print after the default GTest output but before the - * values that failed are printed - * \param[in] direction Which plane the interface is. 0 = plane normal to - * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. - */ - void checkResults(std::vector fiducialFlux, - std::vector scalarFlux, - Real thermalEnergyFlux, - std::vector const &testFlux, - std::string const &customOutput = "", - int const &direction=0) - { - // Field names - std::vector fieldNames{"Densities", - "X Momentum", - "Y Momentum", - "Z Momentum", - "Energies", - "X Magnetic Field", - "Y Magnetic Field", - "Z Magnetic Field"}; - #ifdef DE - fieldNames.push_back("Thermal energy (dual energy)"); - fiducialFlux.push_back(thermalEnergyFlux); - #endif //DE - #ifdef SCALAR - std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; - fieldNames.insert(fieldNames.begin()+5, - scalarNames.begin(), - scalarNames.begin() + NSCALARS); - - fiducialFlux.insert(fiducialFlux.begin()+5, - scalarFlux.begin(), - scalarFlux.begin() + NSCALARS); - #endif //SCALAR - - // Rearrange X, Y, and Z values if a different direction is chosen - // besides default - fiducialFlux = _cycleXYZ(fiducialFlux, direction); - - ASSERT_TRUE( (fiducialFlux.size() == testFlux.size()) - and (fiducialFlux.size() == fieldNames.size())) - << "The fiducial flux, test flux, and field name vectors are not all the same length" << std::endl - << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl - << "testFlux.size() = " << testFlux.size() << std::endl - << "fieldNames.size() = " << fieldNames.size() << std::endl; - - // Check for equality - for (size_t i = 0; i < fieldNames.size(); i++) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialFlux[i], - testFlux[i], - absoluteDiff, - ulpsDiff); - EXPECT_TRUE(areEqual) - << std::endl << customOutput << std::endl - << "There's a difference in " << fieldNames[i] << " Flux" << std::endl - << "The direction is: " << direction << " (0=X, 1=Y, 2=Z)" << std::endl - << "The fiducial value is: " << fiducialFlux[i] << std::endl - << "The test value is: " << testFlux[i] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Convert a vector of quantities in primitive variables to - * conserved variables - * - * \param[in] input The state in primitive variables. In order the - * elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - * \return std::vector The state in conserved variables. In order - * the elements are: density, x-momentum, - * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, - * y-magnetic field, z-magnetic field. - */ - std::vector primitive2Conserved(std::vector const &input, - double const &gamma, - std::vector const &primitiveScalars) - { - std::vector output(input.size()); - output.at(0) = input.at(0); // Density - output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum - output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum - output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum - output.at(4) = mhdUtils::computeEnergy(input.at(4), - input.at(0), - input.at(1), - input.at(2), - input.at(3), - input.at(5), - input.at(6), - input.at(7), - gamma); // Pressure to Energy - output.at(5) = input.at(5); // X Magnetic Field - output.at(6) = input.at(6); // Y Magnetic Field - output.at(7) = input.at(7); // Z Magnetic Field - - #ifdef SCALAR - std::vector conservedScalar(primitiveScalars.size()); - std::transform(primitiveScalars.begin(), - primitiveScalars.end(), - conservedScalar.begin(), - [&](Real const &c){ return c*output.at(0); }); - output.insert(output.begin()+5, - conservedScalar.begin(), - conservedScalar.begin() + NSCALARS); - #endif //SCALAR - #ifdef DE - output.push_back(mhdUtils::computeThermalEnergy(output.at(4), - output.at(0), - output.at(1), - output.at(2), - output.at(3), - output.at(5 + NSCALARS), - output.at(6 + NSCALARS), - output.at(7 + NSCALARS), - gamma)); - #endif //DE - return output; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief On test start make sure that the number of NSCALARS is allowed - * - */ - void SetUp() - { - #ifdef SCALAR - ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently supported in HLLD tests. NSCALARS = " << NSCALARS; - ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test with passive scalars. NSCALARS = " << NSCALARS; - #endif //SCALAR - } - // ===================================================================== - private: - // ===================================================================== - /*! - * \brief Cyclically permute the vector quantities in the list of - * conserved variables so that the same interfaces and fluxes can be - * used to test the HLLD solver in all 3 directions. - * - * \param[in,out] conservedVec The std::vector of conserved variables to - * be cyclically permutated - * \param[in] direction Which plane the interface is. 0 = plane normal - * to X, 1 = plane normal to Y, 2 = plane normal to Z - * - * \return std::vector The cyclically permutated list of conserved - * variables - */ - std::vector inline _cycleXYZ(std::vector conservedVec, - int const &direction) - { - switch (direction) - { - case 0: // Plane normal to X. Default case, do nothing - ; - break; - case 1: // Plane normal to Y - case 2: // Plane normal to Z - // Fall through for both Y and Z normal planes - { - size_t shift = 3 - direction; - auto momentumBegin = conservedVec.begin()+1; - auto magneticBegin = conservedVec.begin()+5; - #ifdef SCALAR - magneticBegin += NSCALARS; - #endif //SCALAR - - std::rotate(momentumBegin, momentumBegin+shift, momentumBegin+3); - std::rotate(magneticBegin, magneticBegin+shift, magneticBegin+3); - } - break; - default: - throw std::invalid_argument(("Invalid Value of `direction`" - " passed to `_cycleXYZ`. Value passed was " - + std::to_string(direction) + ", should be 0, 1, or 2.")); - break; - } - return conservedVec; - } - // ===================================================================== - }; - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Brio & Wu Shock tube - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - BrioAndWuShockTubeCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 2.; - Real const Vz = 0.0; - Real const Bx = 0.75; - Real const Bz = 0.0; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0 , Bz}, gamma, primitiveScalar), - leftFastRareLeftSide = primitive2Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, gamma, primitiveScalar), - leftFastRareRightSide = primitive2Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, gamma, primitiveScalar), - compoundLeftSide = primitive2Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, primitiveScalar), - compoundPeak = primitive2Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, primitiveScalar), - compoundRightSide = primitive2Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, primitiveScalar), - slowShockLeftSide = primitive2Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, gamma, primitiveScalar), - rightFastRareLeftSide = primitive2Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, gamma, primitiveScalar), - rightFastRareRightSide = primitive2Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, gamma, primitiveScalar), - rightICs = primitive2Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Brio & Wu state\n" - "Right State: Left Brio & Wu state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0}; - std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Brio & Wu state\n" - "Right State: Right Brio & Wu state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; - std::vector const scalarFlux{0, 0, 0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Brio & Wu state\n" - "Right State: Right Brio & Wu state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.20673357746080057, 0.4661897584603672, 0.061170028480309613, 0, 0.064707291981509041, 0.0, 1.0074980455427278, 0}; - std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; - Real thermalEnergyFlux = 0.20673357746080046; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Brio & Wu state\n" - "Right State: Left Brio & Wu state\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.20673357746080057, 0.4661897584603672, 0.061170028480309613, 0, -0.064707291981509041, 0.0, -1.0074980455427278, 0}; - std::vector const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; - Real thermalEnergyFlux = -0.20673357746080046; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Cross wave checks - { - std::string const outputString {"Left State: Left of left fast rarefaction\n" - "Right State: Right of left fast rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.4253304970883941, 0.47729308161522394, -0.55321646324583107, 0, 0.92496835095531071, 0.0, 0.53128887284876058, 0}; - std::vector const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; - Real thermalEnergyFlux = 0.41622256825457099; - std::vector const testFluxes = computeFluxes(leftFastRareLeftSide, - leftFastRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left fast rarefaction\n" - "Right State: Left of left fast rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.070492123816403796, 1.2489600267034342, -0.71031457071286608, 0, 0.21008080091470105, 0.0, 0.058615131833681167, 0}; - std::vector const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; - Real thermalEnergyFlux = 0.047345816580591255; - std::vector const testFluxes = computeFluxes(leftFastRareRightSide, - leftFastRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of compound wave\n" - "Right State: Right of compound wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.4470171023231666, 0.60747660800918468, -0.20506357956052623, 0, 0.72655525704800772, 0.0, 0.76278089951123285, 0}; - std::vector const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598}; - Real thermalEnergyFlux = 0.38787931087981475; - std::vector const testFluxes = computeFluxes(compoundLeftSide, - compoundRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of compound wave\n" - "Right State: Left of compound wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.38496850292724116, 0.66092864409611585, -0.3473204105316457, 0, 0.89888639514227009, 0.0, 0.71658566275120927, 0}; - std::vector const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; - Real thermalEnergyFlux = 0.28530908823756074; - std::vector const testFluxes = computeFluxes(compoundRightSide, - compoundLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of Compound Wave\n" - "Right State: Peak of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.41864266180405574, 0.63505764056357727, -0.1991008813536404, 0, 0.73707474818824525, 0.0, 0.74058225030218761, 0}; - std::vector const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111}; - Real thermalEnergyFlux = 0.36325864563467081; - std::vector const testFluxes = computeFluxes(compoundLeftSide, - compoundPeak, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Peak of Compound Wave\n" - "Right State: Left of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.39520761138156862, 0.6390998385557225, -0.35132701297727598, 0, 0.89945171879176522, 0.0, 0.71026545717401468, 0}; - std::vector const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; - Real thermalEnergyFlux = 0.32239432669410983; - std::vector const testFluxes = computeFluxes(compoundPeak, - compoundLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Peak of Compound Wave\n" - "Right State: Right of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.4285899590904928, 0.6079309920345296, -0.26055320217638239, 0, 0.75090757444649436, 0.0, 0.85591904930227747, 0}; - std::vector const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; - Real thermalEnergyFlux = 0.34962629086469987; - std::vector const testFluxes = computeFluxes(compoundPeak, - compoundRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of Compound Wave\n" - "Right State: Peak of Compound Wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.39102247793946454, 0.65467021266207581, -0.25227691377588229, 0, 0.76271525822813691, 0.0, 0.83594460438033491, 0}; - std::vector const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; - Real thermalEnergyFlux = 0.28979582956267347; - std::vector const testFluxes = computeFluxes(compoundRightSide, - compoundPeak, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of contact discontinuity\n" - "Right State: Right of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.40753761783585118, 0.62106392255463172, -0.2455554035355339, 0, 0.73906344777217226, 0.0, 0.8687394222350926, 0}; - std::vector const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; - Real thermalEnergyFlux = 0.30895701155896288; - std::vector const testFluxes = computeFluxes(contactLeftSide, - contactRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of contact discontinuity\n" - "Right State: Left of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.13849588572126192, 0.46025037934770729, 0.18052412687974539, 0, 0.35385590617992224, 0.0, 0.86909622543144227, 0}; - std::vector const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; - Real thermalEnergyFlux = 0.30928031735570188; - std::vector const testFluxes = computeFluxes(contactRightSide, - contactLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Slow shock left side\n" - "Right State: Slow shock right side\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{3.5274134848883865e-05, 0.32304849716274459, 0.60579784881286636, 0, -0.32813070621836449, 0.0, 0.40636483121437972, 0}; - std::vector const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261}; - Real thermalEnergyFlux = 4.4037784886918126e-05; - std::vector const testFluxes = computeFluxes(slowShockLeftSide, - slowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Slow shock right side\n" - "Right State: Slow shock left side\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.016514307834939734, 0.16452009375678914, 0.71622171077118635, 0, -0.37262428139914472, 0.0, 0.37204015363322052, 0}; - std::vector const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; - Real thermalEnergyFlux = -0.020617189878790236; - std::vector const testFluxes = computeFluxes(slowShockRightSide, - slowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right fast rarefaction left side\n" - "Right State: Right fast rarefaction right side\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.026222824218991747, 0.22254903570732654, 0.68544334213642255, 0, -0.33339172106895454, 0.0, 0.32319665359522443, 0}; - std::vector const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; - Real thermalEnergyFlux = -0.020960370728633469; - std::vector const testFluxes = computeFluxes(rightFastRareLeftSide, - rightFastRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right fast rarefaction right side\n" - "Right State: Right fast rarefaction left side\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.001088867226159973, 0.32035322820305906, 0.74922357263343131, 0, -0.0099746892805345766, 0.0, 0.0082135595470345102, 0}; - std::vector const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614}; - Real thermalEnergyFlux = -0.00081785194236053073; - std::vector const testFluxes = computeFluxes(rightFastRareRightSide, - rightFastRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Dai & Woodward Shock tube - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - Real const coef = 1. / (std::sqrt(4. * M_PI)); - Real const Bx = 4. * coef; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6*coef, 2*coef}, gamma, primitiveScalar), - leftFastShockLeftSide = primitive2Conserved({1.09406, 1.176560, 0.021003, 0.506113, 0.970815, 1.12838, 1.105355, 0.614087}, gamma, primitiveScalar), - leftFastShockRightSide = primitive2Conserved({1.40577, 0.693255, 0.210562, 0.611423, 1.494290, 1.12838, 1.457700, 0.809831}, gamma, primitiveScalar), - leftRotationLeftSide = primitive2Conserved({1.40086, 0.687774, 0.215124, 0.609161, 1.485660, 1.12838, 1.458735, 0.789960}, gamma, primitiveScalar), - leftRotationRightSide = primitive2Conserved({1.40119, 0.687504, 0.330268, 0.334140, 1.486570, 1.12838, 1.588975, 0.475782}, gamma, primitiveScalar), - leftSlowShockLeftSide = primitive2Conserved({1.40519, 0.685492, 0.326265, 0.333664, 1.493710, 1.12838, 1.575785, 0.472390}, gamma, primitiveScalar), - leftSlowShockRightSide = primitive2Conserved({1.66488, 0.578545, 0.050746, 0.250260, 1.984720, 1.12838, 1.344490, 0.402407}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved({1.65220, 0.578296, 0.049683, 0.249962, 1.981250, 1.12838, 1.346155, 0.402868}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved({1.49279, 0.578276, 0.049650, 0.249924, 1.981160, 1.12838, 1.346180, 0.402897}, gamma, primitiveScalar), - rightSlowShockLeftSide = primitive2Conserved({1.48581, 0.573195, 0.035338, 0.245592, 1.956320, 1.12838, 1.370395, 0.410220}, gamma, primitiveScalar), - rightSlowShockRightSide = primitive2Conserved({1.23813, 0.450361, -0.275532, 0.151746, 1.439000, 1.12838, 1.609775, 0.482762}, gamma, primitiveScalar), - rightRotationLeftSide = primitive2Conserved({1.23762, 0.450102, -0.274410, 0.145585, 1.437950, 1.12838, 1.606945, 0.493879}, gamma, primitiveScalar), - rightRotationRightSide = primitive2Conserved({1.23747, 0.449993, -0.180766, -0.090238, 1.437350, 1.12838, 1.503855, 0.752090}, gamma, primitiveScalar), - rightFastShockLeftSide = primitive2Conserved({1.22305, 0.424403, -0.171402, -0.085701, 1.409660, 1.12838, 1.447730, 0.723864}, gamma, primitiveScalar), - rightFastShockRightSide = primitive2Conserved({1.00006, 0.000121, -0.000057, -0.000028, 1.000100, 1.12838, 1.128435, 0.564217}, gamma, primitiveScalar), - rightICs = primitive2Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4*coef, 2*coef}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Dai & Woodward state\n" - "Right State: Left Dai & Woodward state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 1.0381971863420549, -1.1459155902616465, -0.63661977236758127, 0, 0.0, 0, -1.1102230246251565e-16}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Dai & Woodward state\n" - "Right State: Right Dai & Woodward state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.35915494309189522, -1.2732395447351625, -0.63661977236758127, -0.63661977236758172, 0.0, 2.2204460492503131e-16, -1.1283791670955123}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Dai & Woodward state\n" - "Right State: Right Dai & Woodward state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.17354924587196074, 0.71614983677687327, -1.1940929411768009, -1.1194725181819352, -0.11432087006939984, 0.0, 0.056156000248263505, -0.42800560867873094}; - std::vector const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; - Real thermalEnergyFlux = 0.24104061926661174; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Dai & Woodward state\n" - "Right State: Left Dai & Woodward state\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.17354924587196074, 0.71614983677687327, -1.1940929411768009, -0.14549552299758384, -0.47242308031148195, 0.0, -0.056156000248263505, -0.55262526758377528}; - std::vector const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; - Real thermalEnergyFlux = -0.24104061926661174; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Cross wave checks - { - std::string const outputString {"Left State: Left of left fast shock\n" - "Right State: Right of left fast shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.96813688187727132, 3.0871217875403394, -1.4687093290523414, -0.33726008721080036, 4.2986213406773457, 0.0, 0.84684181393860269, -0.087452560407274671}; - std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045}; - Real thermalEnergyFlux = 1.2886155333980993; - std::vector const testFluxes = computeFluxes(leftFastShockLeftSide, - leftFastShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left fast shock\n" - "Right State: Left of left fast shock\n" - "HLLD State: Left Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1.3053938862274184, 2.4685129176021858, -1.181892850065283, -0.011160487372167127, 5.1797404608257249, 0.0, 1.1889903073770265, 0.10262704114294516}; - std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666}; - Real thermalEnergyFlux = 2.081389946702628; - std::vector const testFluxes = computeFluxes(leftFastShockRightSide, - leftFastShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of left rotation/Alfven wave\n" - "Right State: Right of left rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.96326128304298586, 2.8879592118317445, -1.4808188010794987, -0.20403672861184916, 4.014027751838869, 0.0, 0.7248753989305099, -0.059178137562467162}; - std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724}; - Real thermalEnergyFlux = 1.5323573637968553; - std::vector const testFluxes = computeFluxes(leftRotationLeftSide, - leftRotationRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left rotation/Alfven wave\n" - "Right State: Left of left rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.96353754504060063, 2.8875487093397085, -1.4327309336053695, -0.31541343522923493, 3.9739842521208342, 0.0, 0.75541746728406312, -0.13479771672887678}; - std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313}; - Real thermalEnergyFlux = 1.5333744977458499; - std::vector const testFluxes = computeFluxes(leftRotationRightSide, - leftRotationLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of left slow shock\n" - "Right State: Right of left slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.88716095730727451, 2.9828594399125663, -1.417062582518549, -0.21524331343191233, 3.863474778369334, 0.0, 0.71242370728996041, -0.05229712416644372}; - std::vector const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; - Real thermalEnergyFlux = 1.4145715457049737; - std::vector const testFluxes = computeFluxes(leftSlowShockLeftSide, - leftSlowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of left slow shock\n" - "Right State: Left of left slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1.042385440439527, 2.7732383399777376, -1.5199872074603551, -0.21019362664841068, 4.1322001036232585, 0.0, 0.72170937317481543, -0.049474715634396704}; - std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879}; - Real thermalEnergyFlux = 1.8639570701934713; - std::vector const testFluxes = computeFluxes(leftSlowShockRightSide, - leftSlowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of contact discontinuity\n" - "Right State: Right of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.95545795601418737, 2.8843900822429749, -1.4715039715239722, -0.21575736014726318, 4.0078718055059257, 0.0, 0.72241353110189066, -0.049073560388753337}; - std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892}; - Real thermalEnergyFlux = 1.7186185770667382; - std::vector const testFluxes = computeFluxes(contactLeftSide, - contactRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of contact discontinuity\n" - "Right State: Left of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.86324813554422819, 2.8309913324581251, -1.4761428591480787, -0.23887765947428419, 3.9892942559102793, 0.0, 0.72244123046603836, -0.049025527032060034}; - std::vector const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; - Real thermalEnergyFlux = 1.7184928987481187; - std::vector const testFluxes = computeFluxes(contactRightSide, - contactLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of right slow shock\n" - "Right State: Right of right slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.81125524370350677, 2.901639500435365, -1.5141545346789429, -0.262600896007809, 3.8479660419540087, 0.0, 0.7218977970017596, -0.049091614519593846}; - std::vector const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; - Real thermalEnergyFlux = 1.6022319728249694; - std::vector const testFluxes = computeFluxes(rightSlowShockLeftSide, - rightSlowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of right slow shock\n" - "Right State: Left of right slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.60157947557836688, 2.3888357198399746, -1.9910500022202977, -0.45610948442354332, 3.5359430988850069, 0.0, 1.0670963294022622, 0.05554893654378229}; - std::vector const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885}; - Real thermalEnergyFlux = 1.0487665253534804; - std::vector const testFluxes = computeFluxes(rightSlowShockRightSide, - rightSlowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of right rotation/Alfven wave\n" - "Right State: Right of right rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.55701691287884714, 2.4652223621237814, -1.9664615862227277, -0.47490477894092042, 3.3900659850690529, 0.0, 1.0325648885587542, 0.059165409025635551}; - std::vector const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; - Real thermalEnergyFlux = 0.9707694646266285; - std::vector const testFluxes = computeFluxes(rightRotationLeftSide, - rightRotationRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of right rotation/Alfven wave\n" - "Right State: Left of right rotation/Alfven wave\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.55689116371132596, 2.4648517303940851, -1.7972202655166787, -0.90018282739798461, 3.3401033852664566, 0.0, 0.88105841856465605, 0.43911718823267476}; - std::vector const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; - Real thermalEnergyFlux = 0.9702629326292449; - std::vector const testFluxes = computeFluxes(rightRotationRightSide, - rightRotationLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of right fast shock\n" - "Right State: Right of right fast shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.48777637414577313, 2.3709438477809708, -1.7282900552525988, -0.86414423547773778, 2.8885015704245069, 0.0, 0.77133731061645838, 0.38566794697432505}; - std::vector const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; - Real thermalEnergyFlux = 0.84330016382608752; - std::vector const testFluxes = computeFluxes(rightFastShockLeftSide, - rightFastShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of right fast shock\n" - "Right State: Left of right fast shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.040639426423817904, 1.0717156491947966, -1.2612066401572222, -0.63060225433149875, 0.15803727234007203, 0.0, 0.042555541396817498, 0.021277678888288909}; - std::vector const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; - Real thermalEnergyFlux = 0.060961577855018087; - std::vector const testFluxes = computeFluxes(rightFastShockRightSide, - rightFastShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Ryu & Jones 4d Shock tube - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - Real const Bx = 0.7; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar), - hydroRareLeftSide = primitive2Conserved({0.990414, 0.012415, 1.458910e-58, 6.294360e-59, 0.984076, Bx, 1.252355e-57, 5.366795e-58}, gamma, primitiveScalar), - hydroRareRightSide = primitive2Conserved({0.939477, 0.079800, 1.557120e-41, 7.505190e-42, 0.901182, Bx, 1.823624e-40, 8.712177e-41}, gamma, primitiveScalar), - switchOnSlowShockLeftSide = primitive2Conserved({0.939863, 0.079142, 1.415730e-02, 7.134030e-03, 0.901820, Bx, 2.519650e-02, 1.290082e-02}, gamma, primitiveScalar), - switchOnSlowShockRightSide = primitive2Conserved({0.651753, 0.322362, 8.070540e-01, 4.425110e-01, 0.490103, Bx, 6.598380e-01, 3.618000e-01}, gamma, primitiveScalar), - contactLeftSide = primitive2Conserved({0.648553, 0.322525, 8.072970e-01, 4.426950e-01, 0.489951, Bx, 6.599295e-01, 3.618910e-01}, gamma, primitiveScalar), - contactRightSide = primitive2Conserved({0.489933, 0.322518, 8.073090e-01, 4.426960e-01, 0.489980, Bx, 6.599195e-01, 3.618850e-01}, gamma, primitiveScalar), - slowShockLeftSide = primitive2Conserved({0.496478, 0.308418, 8.060830e-01, 4.420150e-01, 0.489823, Bx, 6.686695e-01, 3.666915e-01}, gamma, primitiveScalar), - slowShockRightSide = primitive2Conserved({0.298260, -0.016740, 2.372870e-01, 1.287780e-01, 0.198864, Bx, 8.662095e-01, 4.757390e-01}, gamma, primitiveScalar), - rotationLeftSide = primitive2Conserved({0.298001, -0.017358, 2.364790e-01, 1.278540e-01, 0.198448, Bx, 8.669425e-01, 4.750845e-01}, gamma, primitiveScalar), - rotationRightSide = primitive2Conserved({0.297673, -0.018657, 1.059540e-02, 9.996860e-01, 0.197421, Bx, 9.891580e-01, 1.024949e-04}, gamma, primitiveScalar), - fastRareLeftSide = primitive2Conserved({0.297504, -0.020018, 1.137420e-02, 1.000000e+00, 0.197234, Bx, 9.883860e-01, - 4.981931e-17}, gamma, primitiveScalar), - fastRareRightSide = primitive2Conserved({0.299996, -0.000033, 1.855120e-05, 1.000000e+00, 0.199995, Bx, 9.999865e-01, 1.737190e-16}, gamma, primitiveScalar), - rightICs = primitive2Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Ryu & Jones 4d state\n" - "Right State: Left Ryu & Jones 4d state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Ryu & Jones 4d state\n" - "Right State: Right Ryu & Jones 4d state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-5.5511151231257827e-17, 0.45500000000000013, -0.69999999999999996, -5.5511151231257827e-17, 0, 0.0, 0, -0.69999999999999996}; - std::vector const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Ryu & Jones 4d state\n" - "Right State: Right Ryu & Jones 4d state\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.092428729855986602, 0.53311593977445149, -0.39622049648437296, -0.21566989083797167, -0.13287876964320211, 0.0, -0.40407579574102892, -0.21994567048141428}; - std::vector const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; - Real thermalEnergyFlux = 0.13864309478397996; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Ryu & Jones 4d state\n" - "Right State: Left Ryu & Jones 4d state\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.092428729855986602, 0.53311593977445149, -0.39622049648437296, 0.21566989083797167, 0.13287876964320211, 0.0, 0.40407579574102892, -0.21994567048141428}; - std::vector const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; - Real thermalEnergyFlux = -0.13864309478397996; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Cross wave checks - { - std::string const outputString {"Left State: Left side of pure hydrodynamic rarefaction\n" - "Right State: Right side of pure hydrodynamic rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.074035256375659553, 0.66054553664209648, -6.1597070943493028e-41, -2.9447391900433873e-41, 0.1776649658235645, 0.0, -6.3466063324344113e-41, -3.0340891384335242e-41}; - std::vector const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; - Real thermalEnergyFlux = 0.11034221894046368; - std::vector const testFluxes = computeFluxes(hydroRareLeftSide, - hydroRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right side of pure hydrodynamic rarefaction\n" - "Right State: Left side of pure hydrodynamic rarefaction\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.013336890338886076, 0.74071279157971992, -6.1745213352160876e-41, -2.9474651270630147e-41, 0.033152482405470307, 0.0, 6.2022392844946449e-41, 2.9606965476795895e-41}; - std::vector const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; - Real thermalEnergyFlux = 0.019189877201961154; - std::vector const testFluxes = computeFluxes(hydroRareRightSide, - hydroRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of switch on slow shock\n" - "Right State: Right of switch on slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.19734622040826083, 0.47855039640569758, -0.3392293209655618, -0.18588204716255491, 0.10695446263054809, 0.0, -0.3558357543098733, -0.19525093130352045}; - std::vector const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; - Real thermalEnergyFlux = 0.2840373040888583; - std::vector const testFluxes = computeFluxes(switchOnSlowShockLeftSide, - switchOnSlowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of switch on slow shock\n" - "Right State: Left of switch on slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.097593254768855386, 0.76483698872352757, -0.02036438492698419, -0.010747481940703562, 0.25327551496496836, 0.0, -0.002520109973016129, -0.00088262199017708799}; - std::vector const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; - Real thermalEnergyFlux = 0.1100817647375162; - std::vector const testFluxes = computeFluxes(switchOnSlowShockRightSide, - switchOnSlowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of contact discontinuity\n" - "Right State: Right of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.2091677440314007, 0.5956612619664029, -0.29309091669513981, -0.16072556008504282, 0.19220050968424285, 0.0, -0.35226977371803297, -0.19316940226499904}; - std::vector const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; - Real thermalEnergyFlux = 0.23702444986592192; - std::vector const testFluxes = computeFluxes(contactLeftSide, - contactRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of contact discontinuity\n" - "Right State: Left of contact discontinuity\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.15801775068597168, 0.57916072367837657, -0.33437339604094024, -0.18336617461176744, 0.16789791355547545, 0.0, -0.3522739911439669, -0.19317084712861482}; - std::vector const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; - Real thermalEnergyFlux = 0.23704936434506069; - std::vector const testFluxes = computeFluxes(contactRightSide, - contactLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of slow shock\n" - "Right State: Right of slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.11744487326715558, 0.66868230621718128, -0.35832022960458892, -0.19650694834641164, 0.057880816021092185, 0.0, -0.37198011453582402, -0.20397277844271294}; - std::vector const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; - Real thermalEnergyFlux = 0.1738058891582446; - std::vector const testFluxes = computeFluxes(slowShockLeftSide, - slowShockRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of slow shock\n" - "Right State: Left of slow shock\n" - "HLLD State: Left Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.038440990187426027, 0.33776683678923869, -0.62583241538732792, -0.3437911783906169, -0.13471828103488348, 0.0, -0.15165427985881363, -0.082233932588833825}; - std::vector const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; - Real thermalEnergyFlux = 0.038445630017261548; - std::vector const testFluxes = computeFluxes(slowShockRightSide, - slowShockLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of rotation/Alfven wave\n" - "Right State: Right of rotation/Alfven wave\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, -0.33352435102145184, -0.21197843894720192, 0.0, -0.18030635192654354, -0.098381113757603278}; - std::vector const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; - Real thermalEnergyFlux = -0.0052395622905745485; - std::vector const testFluxes = computeFluxes(rotationLeftSide, - rotationRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of rotation/Alfven wave\n" - "Right State: Left of rotation/Alfven wave\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.005459628948343731, 0.4415038084184626, -0.69273580053867279, -0.0051834737482743809, -0.037389286119015486, 0.0, -0.026148289294373184, -0.69914753968916865}; - std::vector const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; - Real thermalEnergyFlux = -0.0054536013916442853; - std::vector const testFluxes = computeFluxes(rotationRightSide, - rotationLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left of fast rarefaction\n" - "Right State: Right of fast rarefaction\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, -0.0059354802028144804, -0.040194357552219451, 0.0, -0.027710302430178135, -0.70000000000000007}; - std::vector const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; - Real thermalEnergyFlux = -0.0059354109546219782; - std::vector const testFluxes = computeFluxes(fastRareLeftSide, - fastRareRightSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right of fast rarefaction\n" - "Right State: Left of fast rarefaction\n" - "HLLD State: Right Double Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-3.0171858819483255e-05, 0.45503057873272706, -0.69998654276213712, -3.0171858819427744e-05, -0.00014827469339251387, 0.0, -8.2898844654399895e-05, -0.69999999999999984}; - std::vector const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373}; - Real thermalEnergyFlux = -3.000421709818028e-05; - std::vector const testFluxes = computeFluxes(fastRareRightSide, - fastRareLeftSide, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver using various states and waves from - * the Einfeldt Strong Rarefaction (EFR) - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - Real const V0 = 2.; - Real const Vy = 0.0; - Real const Vz = 0.0; - Real const Bx = 0.0; - Real const Bz = 0.0; - - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // States - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - leftICs = primitive2Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar), - leftRarefactionCenter = primitive2Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), - leftVxTurnOver = primitive2Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), - midPoint = primitive2Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), - rightVxTurnOver = primitive2Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), - rightRarefactionCenter = primitive2Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar), - rightICs = primitive2Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar); - - for (size_t direction = 0; direction < 3; direction++) - { - // Initial Condition Checks - { - std::string const outputString {"Left State: Left Einfeldt Strong Rarefaction state\n" - "Right State: Left Einfeldt Strong Rarefaction state\n" - "HLLD State: Right"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; - std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; - Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = computeFluxes(leftICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Einfeldt Strong Rarefaction state\n" - "Right State: Right Einfeldt Strong Rarefaction state\n" - "HLLD State: Left"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; - std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; - Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = computeFluxes(rightICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Einfeldt Strong Rarefaction state\n" - "Right State: Right Einfeldt Strong Rarefaction state\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(leftICs, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Einfeldt Strong Rarefaction state\n" - "Right State: Left Einfeldt Strong Rarefaction state\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; - std::vector const scalarFlux{0,0,0}; - Real thermalEnergyFlux = 0.0; - std::vector const testFluxes = computeFluxes(rightICs, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - - // Intermediate state checks - { - std::string const outputString {"Left State: Left Einfeldt Strong Rarefaction state\n" - "Right State: Left rarefaction center\n" - "HLLD State: Right"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0}; - std::vector const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; - Real thermalEnergyFlux = -0.19705631998499995; - std::vector const testFluxes = computeFluxes(leftICs, - leftRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left rarefaction center\n" - "Right State: Left Einfeldt Strong Rarefaction state\n" - "HLLD State: Right"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; - std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; - Real thermalEnergyFlux = -1.3499999999999996; - std::vector const testFluxes = computeFluxes(leftRarefactionCenter, - leftICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left rarefaction center\n" - "Right State: Left Vx turnover point\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0}; - std::vector const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; - Real thermalEnergyFlux = -0.0052127803322822184; - std::vector const testFluxes = computeFluxes(leftRarefactionCenter, - leftVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Vx turnover point\n" - "Right State: Left rarefaction center\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0}; - std::vector const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636}; - Real thermalEnergyFlux = -0.19746407621898149; - std::vector const testFluxes = computeFluxes(leftVxTurnOver, - leftRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Left Vx turnover point\n" - "Right State: Midpoint\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0}; - std::vector const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054}; - Real thermalEnergyFlux = -0.00034184143405415065; - std::vector const testFluxes = computeFluxes(leftVxTurnOver, - midPoint, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Midpoint\n" - "Right State: Left Vx turnover point\n" - "HLLD State: Right Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0}; - std::vector const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; - Real thermalEnergyFlux = -0.001531664896602873; - std::vector const testFluxes = computeFluxes(midPoint, - leftVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Midpoint\n" - "Right State: Right Vx turnover point\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0}; - std::vector const scalarFlux{ 0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; - Real thermalEnergyFlux = 0.00040916715364737997; - std::vector const testFluxes = computeFluxes(midPoint, - rightVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Vx turnover point\n" - "Right State: Midpoint\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0}; - std::vector const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; - Real thermalEnergyFlux = 0.0020542406295284269; - std::vector const testFluxes = computeFluxes(rightVxTurnOver, - midPoint, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Vx turnover point\n" - "Right State: Right rarefaction center\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0}; - std::vector const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; - Real thermalEnergyFlux = 0.0053191138878843835; - std::vector const testFluxes = computeFluxes(rightVxTurnOver, - rightRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right rarefaction center\n" - "Right State: Right Vx turnover point\n" - "HLLD State: Left Star"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0}; - std::vector const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; - Real thermalEnergyFlux = 0.1444638874418068; - std::vector const testFluxes = computeFluxes(rightRarefactionCenter, - rightVxTurnOver, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right rarefaction center\n" - "Right State: Right Einfeldt Strong Rarefaction state\n" - "HLLD State: Left"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0}; - std::vector const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; - Real thermalEnergyFlux = 0.14472930749999999; - std::vector const testFluxes = computeFluxes(rightRarefactionCenter, - rightICs, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Right Einfeldt Strong Rarefaction state\n" - "Right State: Right rarefaction center\n" - "HLLD State: Left"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; - std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; - Real thermalEnergyFlux = 1.3499999999999996; - std::vector const testFluxes = computeFluxes(rightICs, - rightRarefactionCenter, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver with the degenerate state - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - DegenerateStateCorrectInputExpectCorrectOutput) - { - // Constant Values - Real const gamma = 5./3.; - std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; - - // State - std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | Pressure | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - state = primitive2Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar); - - std::vector const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; - Real thermalEnergyFlux = 1.5; - std::string const outputString {"Left State: Degenerate state\n" - "Right State: Degenerate state\n" - "HLLD State: Left Double Star State"}; - - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - // If you run into issues with the energy try 0.001953125 instead. - // That's what I got when running the Athena solver on its own. Running - // the Athena solver with theses tests gave me -0.00080700946455175148 - // though - for (size_t direction = 0; direction < 3; direction++) - { - std::vector const testFluxes = computeFluxes(state, - state, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver with all zeroes - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - AllZeroesExpectAllZeroes) - { - // Constant Values - Real const gamma = 5./3.; - - // State - size_t numElements = 8; - #ifdef SCALAR - numElements += 3; - #endif // SCALAR - - std::vector const state(numElements, 0.0); - std::vector const fiducialFlux(8,0.0); - std::vector const scalarFlux(3,0.0); - Real thermalEnergyFlux = 0.0; - - std::string const outputString {"Left State: All zeroes\n" - "Right State: All zeroes\n" - "HLLD State: Right Star State"}; - - for (size_t direction = 0; direction < 3; direction++) - { - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const testFluxes = computeFluxes(state, - state, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the HLLD Riemann Solver with negative pressure, energy, and - density. - * - */ - TEST_F(tMHDCalculateHLLDFluxesCUDA, - UnphysicalValuesExpectAutomaticFix) - { - // Constant Values - Real const gamma = 5./3.; - - // States - std::vector // | Density | X-Momentum | Y-Momentum | Z-Momentum | Energy | X-Magnetic Field | Y-Magnetic Field | Z-Magnetic Field | Adiabatic Index | Passive Scalars | - negativePressure = { 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0}, - negativeEnergy = { 1.0, 1.0, 1.0, 1.0, -(5-gamma), 1.0, 1.0, 1.0}, - negativeDensity = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, - negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, -gamma, 1.0, 1.0, 1.0}, - negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; - - #ifdef SCALAR - std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; - negativePressure.insert(negativePressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeEnergy.insert(negativeEnergy.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensity.insert(negativeDensity.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - negativeDensityPressure.insert(negativeDensityPressure.begin()+5, conservedScalar.begin(), conservedScalar.begin() + NSCALARS); - #endif // SCALAR - #ifdef DE - negativePressure.push_back(mhdUtils::computeThermalEnergy(negativePressure.at(4),negativePressure.at(0),negativePressure.at(1),negativePressure.at(2),negativePressure.at(3),negativePressure.at(5 + NSCALARS),negativePressure.at(6 + NSCALARS),negativePressure.at(7 + NSCALARS),gamma)); - negativeEnergy.push_back(mhdUtils::computeThermalEnergy(negativeEnergy.at(4),negativeEnergy.at(0),negativeEnergy.at(1),negativeEnergy.at(2),negativeEnergy.at(3),negativeEnergy.at(5 + NSCALARS),negativeEnergy.at(6 + NSCALARS),negativeEnergy.at(7 + NSCALARS),gamma)); - negativeDensity.push_back(mhdUtils::computeThermalEnergy(negativeDensity.at(4),negativeDensity.at(0),negativeDensity.at(1),negativeDensity.at(2),negativeDensity.at(3),negativeDensity.at(5 + NSCALARS),negativeDensity.at(6 + NSCALARS),negativeDensity.at(7 + NSCALARS),gamma)); - negativeDensityEnergyPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityEnergyPressure.at(4),negativeDensityEnergyPressure.at(0),negativeDensityEnergyPressure.at(1),negativeDensityEnergyPressure.at(2),negativeDensityEnergyPressure.at(3),negativeDensityEnergyPressure.at(5 + NSCALARS),negativeDensityEnergyPressure.at(6 + NSCALARS),negativeDensityEnergyPressure.at(7 + NSCALARS),gamma)); - negativeDensityPressure.push_back(mhdUtils::computeThermalEnergy(negativeDensityPressure.at(4),negativeDensityPressure.at(0),negativeDensityPressure.at(1),negativeDensityPressure.at(2),negativeDensityPressure.at(3),negativeDensityPressure.at(5 + NSCALARS),negativeDensityPressure.at(6 + NSCALARS),negativeDensityPressure.at(7 + NSCALARS),gamma)); - #endif //DE - - for (size_t direction = 0; direction < 3; direction++) - { - { - std::string const outputString {"Left State: Negative Pressure\n" - "Right State: Negative Pressure\n" - "HLLD State: Left Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; - Real thermalEnergyFlux = -1.5; - std::vector const testFluxes = computeFluxes(negativePressure, - negativePressure, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Energy\n" - "Right State: Negative Energy\n" - "HLLD State: Left Star State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; - Real thermalEnergyFlux = -6.333333333333333; - std::vector const testFluxes = computeFluxes(negativeEnergy, - negativeEnergy, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Density\n" - "Right State: Negative Density\n" - "HLLD State: Left State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; - Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensity, - negativeDensity, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Density, Energy, and Pressure\n" - "Right State: Negative Density, Energy, and Pressure\n" - "HLLD State: Right State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, 1.5E+20, 0, 0, 0}; - std::vector const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20}; - Real thermalEnergyFlux = 1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensityEnergyPressure, - negativeDensityEnergyPressure, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - { - std::string const outputString {"Left State: Negative Density and Pressure\n" - "Right State: Negative Density and Pressure\n" - "HLLD State: Left State"}; - // Compute the fluxes and check for correctness - // Order of Fluxes is rho, vec(V), E, vec(B) - std::vector const fiducialFlux{1, 1e+20, 1e+20, 1e+20, -1.5e+20, 0, 0, 0}; - std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; - Real thermalEnergyFlux = -1.5000000000000001e+40; - std::vector const testFluxes = computeFluxes(negativeDensityPressure, - negativeDensityPressure, - gamma, - direction); - checkResults(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); - } - } - } - // ========================================================================= - - // ========================================================================= - // End of integration tests for the entire HLLD solver. Unit tests are below - // ========================================================================= - - // ========================================================================= - // Unit tests for the contents of the _hlldInternal namespace - // ========================================================================= - /*! - * \brief A struct to hold some basic test values - * - */ - namespace - { - struct testParams - { - // List of cases - std::vector names{"Case 1", "Case 2"}; - - // Conserved Variables - double gamma = 5./3.; - std::valarray densityL {21.50306776645775 , 48.316634031589935}; - std::valarray densityR {81.1217731762265 , 91.02955738853635}; - std::valarray momentumXL{38.504606872151484 , 18.984145880030045}; - std::valarray momentumXR{ 8.201811315045326 , 85.24863367778745}; - std::valarray momentumYL{ 7.1046427940455015, 33.76182584816693}; - std::valarray momentumYR{13.874767484202021 , 33.023492551299974}; - std::valarray momentumZL{32.25700338919422 , 89.52561861038686}; - std::valarray momentumZR{33.85305318830181 , 8.664313303796256}; - std::valarray energyL {65.75120838109942 , 38.461354599479826}; - std::valarray energyR {18.88982523270516 , 83.65639784178894}; - std::valarray magneticXL{92.75101068883114 , 31.588767769990532}; - std::valarray magneticXR{93.66196246448985 , 84.3529879134052}; - std::valarray magneticYL{12.297499156516622 , 63.74471969570406}; - std::valarray magneticYR{84.9919141787549 , 35.910258841630984}; - std::valarray magneticZL{46.224045698787776 , 37.70326455170754}; - std::valarray magneticZR{34.852095153095384 , 24.052685003977757}; - // Star States - std::valarray densityStarL {28.520995251761526 , 54.721668215064945}; - std::valarray densityStarR {49.09069570738605 , 72.68000504460609}; - std::valarray momentumStarXL{48.96082367518151 , 97.15439466280228}; - std::valarray momentumStarXR{65.74705433463932 , 94.5689655974538}; - std::valarray momentumStarYL{44.910034185328996 , 78.60179936059853}; - std::valarray momentumStarYR{51.642522487399276 , 44.63864007208728}; - std::valarray momentumStarZL{39.78163555990428 , 63.01612978428839}; - std::valarray momentumStarZR{33.47900698769427 , 52.19410653341197}; - std::valarray energyStarL { 6.579867455284738 , 30.45043664908369}; - std::valarray energyStarR {90.44484278669114 , 61.33664731346812}; - std::valarray magneticStarXL{49.81491527582234 , 62.379765828560906}; - std::valarray magneticStarXR{67.77402751903804 , 64.62226739788758}; - std::valarray magneticStarYL{62.09348829143065 , 54.27916744403672}; - std::valarray magneticStarYR{26.835645069149873 , 98.97444628327318}; - std::valarray magneticStarZL{62.765890944643196 , 93.26765455509641}; - std::valarray magneticStarZR{ 7.430231695917344 , 10.696380763901459}; - // Double Star State - std::valarray momentumDoubleStarXL{75.42525315887075 , 83.87480678359029}; - std::valarray momentumDoubleStarYL{22.56132540660678 , 76.11074421934487}; - std::valarray momentumDoubleStarZL{27.83908778933224 , 28.577101567661465}; - std::valarray energyDoubleStar {45.83202455707669 , 55.4553014145573}; - std::valarray magneticDoubleStarY {20.943239839455895 , 83.8514810487021}; - std::valarray magneticDoubleStarZ {83.3802438268807 , 80.36671251730783}; - // Fluxes - std::valarray densityFluxL {12.939239309626116 , 81.71524586517073}; - std::valarray momentumFluxXL {65.05481464917627 , 56.09885069707803}; - std::valarray momentumFluxYL {73.67692845586782 , 2.717246983403787}; - std::valarray momentumFluxZL {16.873647595664387 , 39.70132983192873}; - std::valarray energyFluxL {52.71888731972469 , 81.63926176158796}; - std::valarray magneticFluxXL {67.7412464028116 , 42.85301340921149}; - std::valarray magneticFluxYL {58.98928445415967 , 57.04344459221359}; - std::valarray magneticFluxZL {29.976925743532302 , 97.73329827141359}; - std::valarray momentumStarFluxX{74.90125547448865 , 26.812722601652684}; - std::valarray momentumStarFluxY{16.989138610622945 , 48.349566649914976}; - std::valarray momentumStarFluxZ{38.541822734846185 , 61.22843961052538}; - std::valarray energyStarFlux {19.095105176247017 , 45.43224973313112}; - std::valarray magneticStarFluxY{96.23964526624277 , 33.05337536594796}; - std::valarray magneticStarFluxZ{86.22516928268347 , 15.62102082410738}; - - // Derived/Primitive variables - std::valarray velocityXL = momentumXL / densityL; - std::valarray velocityXR = momentumXR / densityR; - std::valarray velocityYL = momentumYL / densityL; - std::valarray velocityYR = momentumYR / densityR; - std::valarray velocityZL = momentumZL / densityL; - std::valarray velocityZR = momentumZR / densityR; - std::valarray totalPressureStarL{66.80958736783934 , 72.29644038317676}; - std::vector gasPressureL; - std::vector gasPressureR; - std::vector totalPressureL; - std::vector totalPressureR; - // Star State - std::valarray velocityStarXL = momentumStarXL / densityStarL; - std::valarray velocityStarXR = momentumStarXR / densityStarR; - std::valarray velocityStarYL = momentumStarYL / densityStarL; - std::valarray velocityStarYR = momentumStarYR / densityStarR; - std::valarray velocityStarZL = momentumStarZL / densityStarL; - std::valarray velocityStarZR = momentumStarZR / densityStarR; - // Double Star State - std::valarray velocityDoubleStarXL = momentumDoubleStarXL / densityStarL; - std::valarray velocityDoubleStarYL = momentumDoubleStarYL / densityStarL; - std::valarray velocityDoubleStarZL = momentumDoubleStarZL / densityStarL; - // Other - std::valarray speedM {68.68021569453585 , 70.08236749169825}; - std::valarray speedSide {70.37512772923496 , 3.6579130085113265}; - testParams() - { - for (size_t i = 0; i < names.size(); i++) - { - gasPressureL.push_back(mhdUtils::computeGasPressure(energyL[i], densityL[i], momentumXL[i], momentumYL[i], momentumZL[i], magneticXL[i], magneticYL[i], magneticZL[i], gamma)); - gasPressureR.push_back(mhdUtils::computeGasPressure(energyR[i], densityR[i], momentumXR[i], momentumYR[i], momentumZR[i], magneticXR[i], magneticYR[i], magneticZR[i], gamma)); - totalPressureL.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXL[i], magneticYL[i], magneticZL[i])); - totalPressureR.push_back(mhdUtils::computeTotalPressure(gasPressureL.back(), magneticXR[i], magneticYR[i], magneticZR[i])); - } - } - }; - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_approximateWaveSpeeds function - * - */ - TEST(tMHDHlldInternalApproximateWaveSpeeds, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - std::vector const fiducialSpeedL {-22.40376497145191, -11.190385012513822}; - std::vector const fiducialSpeedR {24.295526347371595, 12.519790189404299}; - std::vector const fiducialSpeedM {-0.81760587897407833, -0.026643804611559244}; - std::vector const fiducialSpeedStarL {-19.710500632936679, -4.4880642018724357}; - std::vector const fiducialSpeedStarR {9.777062240423124, 9.17474383484066}; - std::vector const fiducialDensityStarL{24.101290139122913, 50.132466596958501}; - std::vector const fiducialDensityStarR{78.154104734671265, 84.041595114910123}; - - double testSpeedL = 0; - double testSpeedR = 0; - double testSpeedM = 0; - double testSpeedStarL = 0; - double testSpeedStarR = 0; - double testDensityStarL = 0; - double testDensityStarR = 0; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_approximateWaveSpeeds(parameters.densityL[i], - parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.gasPressureL[i], - parameters.totalPressureL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - parameters.densityR[i], - parameters.momentumXR[i], - parameters.momentumYR[i], - parameters.momentumZR[i], - parameters.velocityXR[i], - parameters.velocityYR[i], - parameters.velocityZR[i], - parameters.gasPressureR[i], - parameters.totalPressureR[i], - parameters.magneticXR[i], - parameters.magneticYR[i], - parameters.magneticZR[i], - parameters.gamma, - testSpeedL, - testSpeedR, - testSpeedM, - testSpeedStarL, - testSpeedStarR, - testDensityStarL, - testDensityStarR); - // Now check results - testingUtilities::checkResults(fiducialSpeedL[i], - testSpeedL, - parameters.names.at(i) + ", SpeedL"); - testingUtilities::checkResults(fiducialSpeedR.at(i), - testSpeedR, - parameters.names.at(i) + ", SpeedR"); - testingUtilities::checkResults(fiducialSpeedM.at(i), - testSpeedM, - parameters.names.at(i) + ", SpeedM"); - testingUtilities::checkResults(fiducialSpeedStarL.at(i), - testSpeedStarL, - parameters.names.at(i) + ", SpeedStarL"); - testingUtilities::checkResults(fiducialSpeedStarR.at(i), - testSpeedStarR, - parameters.names.at(i) + ", SpeedStarR"); - testingUtilities::checkResults(fiducialDensityStarL.at(i), - testDensityStarL, - parameters.names.at(i) + ", DensityStarL"); - testingUtilities::checkResults(fiducialDensityStarR.at(i), - testDensityStarR, - parameters.names.at(i) + ", DensityStarR"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_starFluxes function in the non-degenerate - * case - * - */ - TEST(tMHDHlldInternalStarFluxes, - CorrectInputNonDegenerateExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialVelocityStarY {12.831290892281075, 12.92610185957192}; - std::vector const fiducialVelocityStarZ {48.488664548015286, 9.0850326944201107}; - std::vector const fiducialEnergyStar {1654897.6912410262, 956.83439334487116}; - std::vector const fiducialMagneticStarY {-186.47142421374559, 2.6815421494204679}; - std::vector const fiducialMagneticStarZ {-700.91191100481922, 1.5860591049546646}; - std::vector const fiducialDensityStarFlux {506.82678248238807, 105.14430372486369}; - std::vector const fiducialMomentumStarFluxX{135208.06632708258, 14014.840899433098}; - std::vector const fiducialMomentumStarFluxY{25328.25203616685, 2466.5997745560339}; - std::vector const fiducialMomentumStarFluxZ{95071.711914347878, 1530.7490710422007}; - std::vector const fiducialEnergyStarFlux {116459061.8691024, 3440.9679468544314}; - std::vector const fiducialMagneticStarFluxY{-13929.399086330559, -166.32034689537392}; - std::vector const fiducialMagneticStarFluxZ{-52549.811458376971, -34.380297363339892}; - - double testVelocityStarY; - double testVelocityStarZ; - double testEnergyStar; - double testMagneticStarY; - double testMagneticStarZ; - double testDensityStarFlux; - double testMomentumStarFluxX; - double testMomentumStarFluxY; - double testMomentumStarFluxZ; - double testEnergyStarFlux; - double testMagneticStarFluxY; - double testMagneticStarFluxZ; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_starFluxes(parameters.speedM[i], - parameters.speedSide[i], - parameters.densityL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.energyL[i], - parameters.totalPressureL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - parameters.densityStarL[i], - parameters.totalPressureStarL[i], - parameters.densityFluxL[i], - parameters.momentumFluxXL[i], - parameters.momentumFluxYL[i], - parameters.momentumFluxZL[i], - parameters.energyFluxL[i], - parameters.magneticFluxYL[i], - parameters.magneticFluxZL[i], - testVelocityStarY, - testVelocityStarZ, - testEnergyStar, - testMagneticStarY, - testMagneticStarZ, - testDensityStarFlux, - testMomentumStarFluxX, - testMomentumStarFluxY, - testMomentumStarFluxZ, - testEnergyStarFlux, - testMagneticStarFluxY, - testMagneticStarFluxZ); - - // Now check results - testingUtilities::checkResults(fiducialVelocityStarY[i], - testVelocityStarY, - parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialVelocityStarZ[i], - testVelocityStarZ, - parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialEnergyStar[i], - testEnergyStar, - parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialMagneticStarY[i], - testMagneticStarY, - parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialMagneticStarZ[i], - testMagneticStarZ, - parameters.names.at(i) + ", MagneticStarZ"); - testingUtilities::checkResults(fiducialDensityStarFlux[i], - testDensityStarFlux, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialMomentumStarFluxX[i], - testMomentumStarFluxX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialMomentumStarFluxY[i], - testMomentumStarFluxY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialMomentumStarFluxZ[i], - testMomentumStarFluxZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyStarFlux[i], - testEnergyStarFlux, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialMagneticStarFluxY[i], - testMagneticStarFluxY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialMagneticStarFluxZ[i], - testMagneticStarFluxZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); - } - } - - /*! - * \brief Test the _hlldInternal::_starFluxes function in the degenerate - * case - * - */ - TEST(tMHDHlldInternalStarFluxes, - CorrectInputDegenerateExpectCorrectOutput) - { - testParams const parameters; - - // Used to get us into the degenerate case - double const totalPressureStarMultiplier = 1E15; - - std::vector const fiducialVelocityStarY {0.33040135813215948, 0.69876195899931859}; - std::vector const fiducialVelocityStarZ {1.500111692877206, 1.8528943583250035}; - std::vector const fiducialEnergyStar {2.7072182962581443e+18, -76277716432851392}; - std::vector const fiducialMagneticStarY {12.297499156516622, 63.744719695704063}; - std::vector const fiducialMagneticStarZ {46.224045698787776, 37.703264551707541}; - std::vector const fiducialDensityStarFlux {506.82678248238807, 105.14430372486369}; - std::vector const fiducialMomentumStarFluxX{135208.06632708258, 14014.840899433098}; - std::vector const fiducialMomentumStarFluxY{236.85804348470396, 19.08858135095122}; - std::vector const fiducialMomentumStarFluxZ{757.76012607552047, 83.112898961023902}; - std::vector const fiducialEnergyStarFlux {1.9052083339008875e+20, -2.7901725119926531e+17}; - std::vector const fiducialMagneticStarFluxY{58.989284454159673, 57.043444592213589}; - std::vector const fiducialMagneticStarFluxZ{29.976925743532302, 97.733298271413588}; - - double testVelocityStarY; - double testVelocityStarZ; - double testEnergyStar; - double testMagneticStarY; - double testMagneticStarZ; - double testDensityStarFlux; - double testMomentumStarFluxX; - double testMomentumStarFluxY; - double testMomentumStarFluxZ; - double testEnergyStarFlux; - double testMagneticStarFluxY; - double testMagneticStarFluxZ; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_starFluxes(parameters.speedM[i], - parameters.speedSide[i], - parameters.densityL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.energyL[i], - parameters.totalPressureL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - parameters.densityStarL[i], - parameters.totalPressureStarL[i] * totalPressureStarMultiplier, - parameters.densityFluxL[i], - parameters.momentumFluxXL[i], - parameters.momentumFluxYL[i], - parameters.momentumFluxZL[i], - parameters.energyFluxL[i], - parameters.magneticFluxYL[i], - parameters.magneticFluxZL[i], - testVelocityStarY, - testVelocityStarZ, - testEnergyStar, - testMagneticStarY, - testMagneticStarZ, - testDensityStarFlux, - testMomentumStarFluxX, - testMomentumStarFluxY, - testMomentumStarFluxZ, - testEnergyStarFlux, - testMagneticStarFluxY, - testMagneticStarFluxZ); - - // Now check results - testingUtilities::checkResults(fiducialVelocityStarY[i], - testVelocityStarY, - parameters.names.at(i) + ", VelocityStarY"); - testingUtilities::checkResults(fiducialVelocityStarZ[i], - testVelocityStarZ, - parameters.names.at(i) + ", VelocityStarZ"); - testingUtilities::checkResults(fiducialEnergyStar[i], - testEnergyStar, - parameters.names.at(i) + ", EnergyStar"); - testingUtilities::checkResults(fiducialMagneticStarY[i], - testMagneticStarY, - parameters.names.at(i) + ", MagneticStarY"); - testingUtilities::checkResults(fiducialMagneticStarZ[i], - testMagneticStarZ, - parameters.names.at(i) + ", MagneticStarZ"); - testingUtilities::checkResults(fiducialDensityStarFlux[i], - testDensityStarFlux, - parameters.names.at(i) + ", DensityStarFlux"); - testingUtilities::checkResults(fiducialMomentumStarFluxX[i], - testMomentumStarFluxX, - parameters.names.at(i) + ", MomentumStarFluxX"); - testingUtilities::checkResults(fiducialMomentumStarFluxY[i], - testMomentumStarFluxY, - parameters.names.at(i) + ", MomentumStarFluxY"); - testingUtilities::checkResults(fiducialMomentumStarFluxZ[i], - testMomentumStarFluxZ, - parameters.names.at(i) + ", MomentumStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyStarFlux[i], - testEnergyStarFlux, - parameters.names.at(i) + ", EnergyStarFlux"); - testingUtilities::checkResults(fiducialMagneticStarFluxY[i], - testMagneticStarFluxY, - parameters.names.at(i) + ", MagneticStarFluxY"); - testingUtilities::checkResults(fiducialMagneticStarFluxZ[i], - testMagneticStarFluxZ, - parameters.names.at(i) + ", MagneticStarFluxZ"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_nonStarFluxes function - * - */ - TEST(tMHDHlldInternalNonStarFluxes, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialDensityFlux {38.504606872151484, 18.984145880030045}; - std::vector const fiducialMomentumFluxX{-3088.4810263278778, 2250.9966820900618}; - std::vector const fiducialMomentumFluxY{-1127.8835013070616, -2000.3517480656785}; - std::vector const fiducialMomentumFluxZ{-4229.5657456907293, -1155.8240512956793}; - std::vector const fiducialMagneticFluxY{-8.6244637840856555, 2.9729840344910059}; - std::vector const fiducialMagneticFluxZ{-56.365490339906408, -43.716615275067923}; - std::vector const fiducialEnergyFlux {-12344.460641662206, -2717.2127176227905}; - - double testDensityFlux; - double testMomentumFluxX; - double testMomentumFluxY; - double testMomentumFluxZ; - double testMagneticFluxY; - double testMagneticFluxZ; - double testEnergyFlux; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_nonStarFluxes(parameters.momentumXL[i], - parameters.velocityXL[i], - parameters.velocityYL[i], - parameters.velocityZL[i], - parameters.totalPressureL[i], - parameters.energyL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i], - testDensityFlux, - testMomentumFluxX, - testMomentumFluxY, - testMomentumFluxZ, - testMagneticFluxY, - testMagneticFluxZ, - testEnergyFlux); - - // Now check results - testingUtilities::checkResults(fiducialDensityFlux[i], - testDensityFlux, - parameters.names.at(i) + ", DensityFlux"); - testingUtilities::checkResults(fiducialMomentumFluxX[i], - testMomentumFluxX, - parameters.names.at(i) + ", MomentumFluxX"); - testingUtilities::checkResults(fiducialMomentumFluxY[i], - testMomentumFluxY, - parameters.names.at(i) + ", MomentumFluxY"); - testingUtilities::checkResults(fiducialMomentumFluxZ[i], - testMomentumFluxZ, - parameters.names.at(i) + ", MomentumFluxZ"); - testingUtilities::checkResults(fiducialMagneticFluxY[i], - testMagneticFluxY, - parameters.names.at(i) + ", MagneticFluxY"); - testingUtilities::checkResults(fiducialMagneticFluxZ[i], - testMagneticFluxZ, - parameters.names.at(i) + ", MagneticFluxZ"); - testingUtilities::checkResults(fiducialEnergyFlux[i], - testEnergyFlux, - parameters.names.at(i) + ", EnergyFlux"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_dotProduct function - * - */ - TEST(tMHDHlldInternalDotProduct, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialDotProduct{5149.7597411033557,6127.2319832451567}; - - double testDotProduct; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - testDotProduct = _hlldInternal::_dotProduct(parameters.momentumXL[i], - parameters.momentumYL[i], - parameters.momentumZL[i], - parameters.magneticXL[i], - parameters.magneticYL[i], - parameters.magneticZL[i]); - - // Now check results - testingUtilities::checkResults(fiducialDotProduct[i], - testDotProduct, - parameters.names.at(i) + ", DotProduct"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_doubleStarState function. Non-degenerate - * state - * - */ - TEST(tMHDHlldInternalDoubleStarState, - CorrectInputNonDegenerateExpectCorrectOutput) - { - testParams const parameters; - - double const fixedEpsilon = 7E-12; - - std::vector const fiducialVelocityDoubleStarY{-1.5775383335759607, 3.803188977150934}; - std::vector const fiducialVelocityDoubleStarZ{-3.4914062207842482, -4.2662645349592765}; - std::vector const fiducialMagneticDoubleStarY{45.259313435283325, 71.787329583230417}; - std::vector const fiducialMagneticDoubleStarZ{36.670978215630669, 53.189673238238178}; - std::vector const fiducialEnergyDoubleStarL {-2048.1953674500514, -999.79694164635089}; - std::vector const fiducialEnergyDoubleStarR {1721.0582276783764, 252.04716752257781}; - - double testVelocityDoubleStarY; - double testVelocityDoubleStarZ; - double testMagneticDoubleStarY; - double testMagneticDoubleStarZ; - double testEnergyDoubleStarL; - double testEnergyDoubleStarR; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_doubleStarState(parameters.speedM[i], - parameters.magneticXL[i], - parameters.totalPressureStarL[i], - parameters.densityStarL[i], - parameters.velocityStarYL[i], - parameters.velocityStarZL[i], - parameters.energyStarL[i], - parameters.magneticStarYL[i], - parameters.magneticStarZL[i], - parameters.densityStarR[i], - parameters.velocityStarYR[i], - parameters.velocityStarZR[i], - parameters.energyStarR[i], - parameters.magneticStarYR[i], - parameters.magneticStarZR[i], - testVelocityDoubleStarY, - testVelocityDoubleStarZ, - testMagneticDoubleStarY, - testMagneticDoubleStarZ, - testEnergyDoubleStarL, - testEnergyDoubleStarR); - - // Now check results - testingUtilities::checkResults(fiducialVelocityDoubleStarY[i], - testVelocityDoubleStarY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialVelocityDoubleStarZ[i], - testVelocityDoubleStarZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialMagneticDoubleStarY[i], - testMagneticDoubleStarY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialMagneticDoubleStarZ[i], - testMagneticDoubleStarZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialEnergyDoubleStarL[i], - testEnergyDoubleStarL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialEnergyDoubleStarR[i], - testEnergyDoubleStarR, - parameters.names.at(i) + ", EnergyDoubleStarR", - fixedEpsilon); - } - } - - /*! - * \brief Test the _hlldInternal::_doubleStarState function in the - * degenerate state. - * - */ - TEST(tMHDHlldInternalDoubleStarState, - CorrectInputDegenerateExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialVelocityDoubleStarY{1.5746306813243216, 1.4363926014039052}; - std::vector const fiducialVelocityDoubleStarZ{1.3948193325212686, 1.1515754515491903}; - std::vector const fiducialMagneticDoubleStarY{62.093488291430653, 54.279167444036723}; - std::vector const fiducialMagneticDoubleStarZ{62.765890944643196, 93.267654555096414}; - std::vector const fiducialEnergyDoubleStarL {6.579867455284738, 30.450436649083692}; - std::vector const fiducialEnergyDoubleStarR {90.44484278669114, 61.33664731346812}; - - double testVelocityDoubleStarY; - double testVelocityDoubleStarZ; - double testMagneticDoubleStarY; - double testMagneticDoubleStarZ; - double testEnergyDoubleStarL; - double testEnergyDoubleStarR; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_doubleStarState(parameters.speedM[i], - 0.0, - parameters.totalPressureStarL[i], - parameters.densityStarL[i], - parameters.velocityStarYL[i], - parameters.velocityStarZL[i], - parameters.energyStarL[i], - parameters.magneticStarYL[i], - parameters.magneticStarZL[i], - parameters.densityStarR[i], - parameters.velocityStarYR[i], - parameters.velocityStarZR[i], - parameters.energyStarR[i], - parameters.magneticStarYR[i], - parameters.magneticStarZR[i], - testVelocityDoubleStarY, - testVelocityDoubleStarZ, - testMagneticDoubleStarY, - testMagneticDoubleStarZ, - testEnergyDoubleStarL, - testEnergyDoubleStarR); - // Now check results - testingUtilities::checkResults(fiducialVelocityDoubleStarY[i], - testVelocityDoubleStarY, - parameters.names.at(i) + ", VelocityDoubleStarY"); - testingUtilities::checkResults(fiducialVelocityDoubleStarZ[i], - testVelocityDoubleStarZ, - parameters.names.at(i) + ", VelocityDoubleStarZ"); - testingUtilities::checkResults(fiducialMagneticDoubleStarY[i], - testMagneticDoubleStarY, - parameters.names.at(i) + ", MagneticDoubleStarY"); - testingUtilities::checkResults(fiducialMagneticDoubleStarZ[i], - testMagneticDoubleStarZ, - parameters.names.at(i) + ", MagneticDoubleStarZ"); - testingUtilities::checkResults(fiducialEnergyDoubleStarL[i], - testEnergyDoubleStarL, - parameters.names.at(i) + ", EnergyDoubleStarL"); - testingUtilities::checkResults(fiducialEnergyDoubleStarR[i], - testEnergyDoubleStarR, - parameters.names.at(i) + ", EnergyDoubleStarR"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_doubleStarFluxes function - * - */ - TEST(tMHDHlldInternalDoubleStarFluxes, - CorrectInputExpectCorrectOutput) - { - testParams const parameters; - - std::vector const fiducialMomentumDoubleStarFluxX{1937.3388606704509, -21.762854649386174}; - std::vector const fiducialMomentumDoubleStarFluxY{-1555.8040962754276, 39.237503643804175}; - std::vector const fiducialMomentumDoubleStarFluxZ{-801.91650203165148, -64.746529703562871}; - std::vector const fiducialEnergyDoubleStarFlux {2781.4706748628528, 136.89786983482355}; - std::vector const fiducialMagneticDoubleStarFluxY{-2799.7143456312342, 141.2263259922299}; - std::vector const fiducialMagneticDoubleStarFluxZ{1536.9628864256708, -31.569502877970095}; - - - double testMomentumDoubleStarFluxX; - double testMomentumDoubleStarFluxY; - double testMomentumDoubleStarFluxZ; - double testEnergyDoubleStarFlux; - double testMagneticDoubleStarFluxY; - double testMagneticDoubleStarFluxZ; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - _hlldInternal::_doubleStarFluxes(parameters.speedSide[i], - parameters.momentumStarFluxX[i], - parameters.momentumStarFluxY[i], - parameters.momentumStarFluxZ[i], - parameters.energyStarFlux[i], - parameters.magneticStarFluxY[i], - parameters.magneticStarFluxZ[i], - parameters.densityStarL[i], - parameters.velocityStarXL[i], - parameters.velocityStarYL[i], - parameters.velocityStarZL[i], - parameters.energyStarL[i], - parameters.magneticStarYL[i], - parameters.magneticStarZL[i], - parameters.velocityDoubleStarXL[i], - parameters.velocityDoubleStarYL[i], - parameters.velocityDoubleStarZL[i], - parameters.energyDoubleStar[i], - parameters.magneticDoubleStarY[i], - parameters.magneticDoubleStarZ[i], - testMomentumDoubleStarFluxX, - testMomentumDoubleStarFluxY, - testMomentumDoubleStarFluxZ, - testEnergyDoubleStarFlux, - testMagneticDoubleStarFluxY, - testMagneticDoubleStarFluxZ); - - // Now check results - testingUtilities::checkResults(fiducialMomentumDoubleStarFluxX[i], - testMomentumDoubleStarFluxX, - parameters.names.at(i) + ", MomentumDoubleStarFluxX"); - testingUtilities::checkResults(fiducialMomentumDoubleStarFluxY[i], - testMomentumDoubleStarFluxY, - parameters.names.at(i) + ", MomentumDoubleStarFluxY"); - testingUtilities::checkResults(fiducialMomentumDoubleStarFluxZ[i], - testMomentumDoubleStarFluxZ, - parameters.names.at(i) + ", MomentumDoubleStarFluxZ"); - testingUtilities::checkResults(fiducialEnergyDoubleStarFlux[i], - testEnergyDoubleStarFlux, - parameters.names.at(i) + ", EnergyDoubleStarFlux"); - testingUtilities::checkResults(fiducialMagneticDoubleStarFluxY[i], - testMagneticDoubleStarFluxY, - parameters.names.at(i) + ", MagneticDoubleStarFluxY"); - testingUtilities::checkResults(fiducialMagneticDoubleStarFluxZ[i], - testMagneticDoubleStarFluxZ, - parameters.names.at(i) + ", MagneticDoubleStarFluxZ"); - } - } - // ========================================================================= - - // ========================================================================= - /*! - * \brief Test the _hlldInternal::_returnFluxes function - * - */ - TEST(tMHDHlldInternalReturnFluxes, - CorrectInputExpectCorrectOutput) - { - double const dummyValue = 999; - double const densityFlux = 1; - double const momentumFluxX = 2; - double const momentumFluxY = 3; - double const momentumFluxZ = 4; - double const energyFlux = 5; - double const magneticFluxY = 6; - double const magneticFluxZ = 7; - - int threadId = 0; - int n_cells = 10; - int nFields = 8; // Total number of conserved fields - #ifdef SCALAR - nFields += NSCALARS; - #endif // SCALAR - #ifdef DE - nFields++; - #endif //DE - - // Lambda for finding indices and check if they're correct - auto findIndex = [](std::vector const &vec, - double const &num, - int const &fidIndex, - std::string const &name) - { - int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); - // EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl; - - return index; - }; - - for (size_t direction = 0; direction < 3; direction++) - { - int o1, o2, o3; - if (direction==0) {o1 = 1; o2 = 2; o3 = 3;} - if (direction==1) {o1 = 2; o2 = 3; o3 = 1;} - if (direction==2) {o1 = 3; o2 = 1; o3 = 2;} - - std::vector testFluxArray(nFields*n_cells, dummyValue); - - // Fiducial Indices - int const fiducialDensityIndex = threadId; - int const fiducialMomentumIndexX = threadId + n_cells * o1; - int const fiducialMomentumIndexY = threadId + n_cells * o2; - int const fiducialMomentumIndexZ = threadId + n_cells * o3; - int const fiducialEnergyIndex = threadId + n_cells * 4; - int const fiducialMagneticYIndex = threadId + n_cells * (o2 + 4 + NSCALARS); - int const fiducialMagneticZIndex = threadId + n_cells * (o3 + 4 + NSCALARS); - - _hlldInternal::_returnFluxes(threadId, - o1, - o2, - o3, - n_cells, - testFluxArray.data(), - densityFlux, - momentumFluxX, - momentumFluxY, - momentumFluxZ, - energyFlux, - magneticFluxY, - magneticFluxZ); - - // Find the indices for the various fields - int densityLoc = findIndex(testFluxArray, densityFlux, fiducialDensityIndex, "density"); - int momentumXLocX = findIndex(testFluxArray, momentumFluxX, fiducialMomentumIndexX, "momentum X"); - int momentumYLocY = findIndex(testFluxArray, momentumFluxY, fiducialMomentumIndexY, "momentum Y"); - int momentumZLocZ = findIndex(testFluxArray, momentumFluxZ, fiducialMomentumIndexZ, "momentum Z"); - int energyLoc = findIndex(testFluxArray, energyFlux, fiducialEnergyIndex, "energy"); - int magneticYLoc = findIndex(testFluxArray, magneticFluxY, fiducialMagneticYIndex, "magnetic Y"); - int magneticZLoc = findIndex(testFluxArray, magneticFluxZ, fiducialMagneticZIndex, "magnetic Z"); - - for (size_t i = 0; i < testFluxArray.size(); i++) - { - // Skip the already checked indices - if ((i != densityLoc) and - (i != momentumXLocX) and - (i != momentumYLocY) and - (i != momentumZLocZ) and - (i != energyLoc) and - (i != magneticYLoc) and - (i != magneticZLoc)) - { - EXPECT_EQ(dummyValue, testFluxArray.at(i)) - << "Unexpected value at index that _returnFluxes shouldn't be touching" << std::endl - << "Index = " << i << std::endl - << "Direction = " << direction << std::endl; - } - } - } - } - // ========================================================================= -#endif // CUDA & HLLD \ No newline at end of file +#include "../utils/testing_utilities.h" + +#ifdef MHD +// ========================================================================= +// Integration tests for the entire HLLD solver. Unit tests are below +// ========================================================================= + +// ========================================================================= +/*! +* \brief Test fixture for simple testing of the HLLD Riemann Solver. +Effectively takes the left state, right state, fiducial fluxes, and +custom user output then performs all the required running and testing +* +*/ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDCalculateHLLDFluxesCUDA : public ::testing::Test +{ + protected: + // ===================================================================== + /*! + * \brief Compute and return the HLLD fluxes + * + * \param[in] leftState The state on the left side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] rightState The state on the right side in conserved + * variables. In order the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] gamma The adiabatic index + * \param[in] direction Which plane the interface is. 0 = plane normal to + * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. + * \return std::vector + */ + std::vector Compute_Fluxes(std::vector stateLeft, std::vector stateRight, Real const &gamma, + int const &direction = 0) + { + // Rearrange X, Y, and Z values for the chosen direction + std::rotate(stateLeft.begin() + 1, stateLeft.begin() + 4 - direction, stateLeft.begin() + 4); + std::rotate(stateRight.begin() + 1, stateRight.begin() + 4 - direction, stateRight.begin() + 4); + + // Create new vectors that store the values in the way that the HLLD + // solver expects + EXPECT_DOUBLE_EQ(stateLeft.at(grid_enum::magnetic_x), stateRight.at(grid_enum::magnetic_x)) + << "The left and right magnetic fields are not equal"; + std::vector const magneticX{stateLeft.at(grid_enum::magnetic_x)}; + stateLeft.erase(stateLeft.begin() + grid_enum::magnetic_x); + stateRight.erase(stateRight.begin() + grid_enum::magnetic_x); + + // Simulation Paramters + int const nx = 1; // Number of cells in the x-direction + int const ny = 1; // Number of cells in the y-direction + int const nz = 1; // Number of cells in the z-direction + int const n_cells = nx * ny * nz; + int nFields = 8; // Total number of conserved fields + #ifdef SCALAR + nFields += NSCALARS; + #endif // SCALAR + #ifdef DE + nFields++; + #endif // DE + + // Launch Parameters + dim3 const dimGrid(1, 1, 1); // How many blocks in the grid + dim3 const dimBlock(1, 1, 1); // How many threads per block + + // Create the std::vector to store the fluxes and declare the device + // pointers + std::vector testFlux(nFields - 1, 0); + Real *devConservedLeft; + Real *devConservedRight; + Real *devConservedMagXFace; + Real *devTestFlux; + + // Allocate device arrays and copy data + GPU_Error_Check(cudaMalloc(&devConservedLeft, stateLeft.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedRight, stateRight.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devConservedMagXFace, magneticX.size() * sizeof(Real))); + GPU_Error_Check(cudaMalloc(&devTestFlux, testFlux.size() * sizeof(Real))); + + GPU_Error_Check( + cudaMemcpy(devConservedLeft, stateLeft.data(), stateLeft.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check( + cudaMemcpy(devConservedRight, stateRight.data(), stateRight.size() * sizeof(Real), cudaMemcpyHostToDevice)); + GPU_Error_Check( + cudaMemcpy(devConservedMagXFace, magneticX.data(), magneticX.size() * sizeof(Real), cudaMemcpyHostToDevice)); + + // Run kernel + hipLaunchKernelGGL(mhd::Calculate_HLLD_Fluxes_CUDA, dimGrid, dimBlock, 0, 0, + devConservedLeft, // the "left" interface + devConservedRight, // the "right" interface + devConservedMagXFace, // the magnetic field at the interface + devTestFlux, n_cells, gamma, direction, nFields); + + GPU_Error_Check(); + GPU_Error_Check(cudaMemcpy(testFlux.data(), devTestFlux, testFlux.size() * sizeof(Real), cudaMemcpyDeviceToHost)); + + // Make sure to sync with the device so we have the results + cudaDeviceSynchronize(); + GPU_Error_Check(); + + // Free device arrays + cudaFree(devConservedLeft); + cudaFree(devConservedRight); + cudaFree(devConservedMagXFace); + cudaFree(devTestFlux); + + // The HLLD solver only writes the the first two "slots" for + // magnetic flux so let's rearrange to make sure we have all the + // magnetic fluxes in the right spots + testFlux.insert(testFlux.begin() + grid_enum::magnetic_x, 0.0); + std::rotate(testFlux.begin() + 1, testFlux.begin() + 1 + direction, + testFlux.begin() + 4); // Rotate momentum + + return testFlux; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Check if the fluxes are correct + * + * \param[in] fiducialFlux The fiducial flux in conserved variables. In + * order the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] scalarFlux The fiducial flux in the passive scalars + * \param[in] thermalEnergyFlux The fiducial flux in the dual energy + * thermal energy + * \param[in] testFlux The test flux in conserved variables. In order the + * elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \param[in] customOutput Any custom output the user would like to + * print. It will print after the default GTest output but before the + * values that failed are printed + * \param[in] direction Which plane the interface is. 0 = plane normal to + * X, 1 = plane normal to Y, 2 = plane normal to Z. Defaults to 0. + */ + void Check_Results(std::vector fiducialFlux, std::vector const &scalarFlux, Real thermalEnergyFlux, + std::vector const &testFlux, std::string const &customOutput = "", int const &direction = 0) + { + // Field names + std::vector fieldNames{"Densities", "X Momentum", "Y Momentum", "Z Momentum", + "Energies", "X Magnetic Field", "Y Magnetic Field", "Z Magnetic Field"}; + #ifdef DE + fieldNames.push_back("Thermal energy (dual energy)"); + fiducialFlux.push_back(thermalEnergyFlux); + #endif // DE + #ifdef SCALAR + std::vector scalarNames{"Scalar 1", "Scalar 2", "Scalar 3"}; + fieldNames.insert(fieldNames.begin() + grid_enum::magnetic_start, scalarNames.begin(), + scalarNames.begin() + grid_enum::nscalars); + + fiducialFlux.insert(fiducialFlux.begin() + grid_enum::magnetic_start, scalarFlux.begin(), + scalarFlux.begin() + grid_enum::nscalars); + #endif // SCALAR + + ASSERT_TRUE((fiducialFlux.size() == testFlux.size()) and (fiducialFlux.size() == fieldNames.size())) + << "The fiducial flux, test flux, and field name vectors are not all " + "the same length" + << std::endl + << "fiducialFlux.size() = " << fiducialFlux.size() << std::endl + << "testFlux.size() = " << testFlux.size() << std::endl + << "fieldNames.size() = " << fieldNames.size() << std::endl; + + // Check for equality + for (size_t i = 0; i < fieldNames.size(); i++) { + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + + // This error is consistent with the FP error in rearanging the flux + // computations in the Athena solver + double const fixedEpsilon = 2.7E-15; + int64_t const ulpsEpsilon = 7; + + bool areEqual = testing_utilities::nearlyEqualDbl(fiducialFlux[i], testFlux[i], absoluteDiff, ulpsDiff, + fixedEpsilon, ulpsEpsilon); + EXPECT_TRUE(areEqual) << std::endl + << customOutput << std::endl + << "There's a difference in " << fieldNames[i] << " Flux" << std::endl + << "The direction is: " << direction << " (0=X, 1=Y, 2=Z)" << std::endl + << "The fiducial value is: " << fiducialFlux[i] << std::endl + << "The test value is: " << testFlux[i] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + } + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief Convert a vector of quantities in primitive variables to + * conserved variables + * + * \param[in] input The state in primitive variables. In order the + * elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + * \return std::vector The state in conserved variables. In order + * the elements are: density, x-momentum, + * y-momentum, z-momentum, energy, passive scalars, x-magnetic field, + * y-magnetic field, z-magnetic field. + */ + std::vector Primitive_2_Conserved(std::vector const &input, double const &gamma, + std::vector const &primitiveScalars) + { + std::vector output(input.size()); + output.at(0) = input.at(0); // Density + output.at(1) = input.at(1) * input.at(0); // X Velocity to momentum + output.at(2) = input.at(2) * input.at(0); // Y Velocity to momentum + output.at(3) = input.at(3) * input.at(0); // Z Velocity to momentum + output.at(4) = + hydro_utilities::Calc_Energy_Primitive(input.at(4), input.at(0), input.at(1), input.at(2), input.at(3), gamma, + input.at(5), input.at(6), input.at(7)); // Pressure to Energy + output.at(5) = input.at(5); // X Magnetic Field + output.at(6) = input.at(6); // Y Magnetic Field + output.at(7) = input.at(7); // Z Magnetic Field + + #ifdef SCALAR + std::vector conservedScalar(primitiveScalars.size()); + std::transform(primitiveScalars.begin(), primitiveScalars.end(), conservedScalar.begin(), + [&](Real const &c) { return c * output.at(0); }); + output.insert(output.begin() + grid_enum::magnetic_start, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + #endif // SCALAR + #ifdef DE + output.push_back(mhd::utils::computeThermalEnergy( + output.at(4), output.at(0), output.at(1), output.at(2), output.at(3), output.at(grid_enum::magnetic_x), + output.at(grid_enum::magnetic_y), output.at(grid_enum::magnetic_z), gamma)); + #endif // DE + return output; + } + // ===================================================================== + + // ===================================================================== + /*! + * \brief On test start make sure that the number of NSCALARS is allowed + * + */ + void SetUp() + { + #ifdef SCALAR + ASSERT_LE(NSCALARS, 3) << "Only up to 3 passive scalars are currently " + "supported in HLLD tests. NSCALARS = " + << NSCALARS; + ASSERT_GE(NSCALARS, 1) << "There must be at least 1 passive scalar to test " + "with passive scalars. NSCALARS = " + << NSCALARS; + #endif // SCALAR + } + // ===================================================================== + private: +}; +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Brio & Wu Shock tube + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, BrioAndWuShockTubeCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 2.; + Real const Vz = 0.0; + Real const Bx = 0.75; + Real const Bz = 0.0; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive + // Scalars | + leftICs = Primitive_2_Conserved({1.0, 0.0, 0.0, Vz, 1.0, Bx, 1.0, Bz}, gamma, primitiveScalar), + leftFastRareLeftSide = Primitive_2_Conserved({0.978576, 0.038603, -0.011074, Vz, 0.957621, Bx, 0.970288, Bz}, + gamma, primitiveScalar), + leftFastRareRightSide = Primitive_2_Conserved({0.671655, 0.647082, -0.238291, Vz, 0.451115, Bx, 0.578240, Bz}, + gamma, primitiveScalar), + compoundLeftSide = Primitive_2_Conserved({0.814306, 0.506792, -0.911794, Vz, 0.706578, Bx, -0.108819, Bz}, gamma, + primitiveScalar), + compoundPeak = Primitive_2_Conserved({0.765841, 0.523701, -1.383720, Vz, 0.624742, Bx, -0.400787, Bz}, gamma, + primitiveScalar), + compoundRightSide = Primitive_2_Conserved({0.695211, 0.601089, -1.583720, Vz, 0.515237, Bx, -0.537027, Bz}, gamma, + primitiveScalar), + contactLeftSide = Primitive_2_Conserved({0.680453, 0.598922, -1.584490, Vz, 0.515856, Bx, -0.533616, Bz}, gamma, + primitiveScalar), + contactRightSide = Primitive_2_Conserved({0.231160, 0.599261, -1.584820, Vz, 0.516212, Bx, -0.533327, Bz}, gamma, + primitiveScalar), + slowShockLeftSide = Primitive_2_Conserved({0.153125, 0.086170, -0.683303, Vz, 0.191168, Bx, -0.850815, Bz}, gamma, + primitiveScalar), + slowShockRightSide = Primitive_2_Conserved({0.117046, -0.238196, -0.165561, Vz, 0.087684, Bx, -0.903407, Bz}, + gamma, primitiveScalar), + rightFastRareLeftSide = Primitive_2_Conserved({0.117358, -0.228756, -0.158845, Vz, 0.088148, Bx, -0.908335, Bz}, + gamma, primitiveScalar), + rightFastRareRightSide = Primitive_2_Conserved({0.124894, -0.003132, -0.002074, Vz, 0.099830, Bx, -0.999018, Bz}, + gamma, primitiveScalar), + rightICs = Primitive_2_Conserved({0.128, 0.0, 0.0, Vz, 0.1, Bx, -1.0, Bz}, gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Brio & Wu state\n" + "Right State: Left Brio & Wu state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 1.21875, -0.75, 0, 0, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Brio & Wu state\n" + "Right State: Right Brio & Wu state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 0.31874999999999998, 0.75, 0, 0, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Brio & Wu state\n" + "Right State: Right Brio & Wu state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.20673357746080057, 0.4661897584603672, + 0.061170028480309613, 0, + 0.064707291981509041, 0.0, + 1.0074980455427278, 0}; + std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; + Real thermalEnergyFlux = 0.20673357746080046; + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Brio & Wu state with negative Bx\n" + "Right State: Right Brio & Wu state with negative Bx\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.20673357746080057, 0.4661897584603672, + -0.061170028480309613, 0, + 0.064707291981509041, 0.0, + 1.0074980455427278, 0}; + std::vector const scalarFlux{0.22885355953447648, 0.46073027567244362, 0.6854281091039145}; + Real thermalEnergyFlux = 0.20673357746080046; + + std::vector leftICsNegBx = leftICs, rightICsNegBx = rightICs; + leftICsNegBx[5] = -leftICsNegBx[5]; + rightICsNegBx[5] = -rightICsNegBx[5]; + + std::vector const testFluxes = Compute_Fluxes(leftICsNegBx, rightICsNegBx, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Brio & Wu state\n" + "Right State: Left Brio & Wu state\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.20673357746080057, 0.4661897584603672, + 0.061170028480309613, 0, + -0.064707291981509041, 0.0, + -1.0074980455427278, 0}; + std::vector const scalarFlux{-0.22885355953447648, -0.46073027567244362, -0.6854281091039145}; + Real thermalEnergyFlux = -0.20673357746080046; + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + + // Cross wave checks + { + std::string const outputString{ + "Left State: Left of left fast rarefaction\n" + "Right State: Right of left fast rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.4253304970883941, 0.47729308161522394, + -0.55321646324583107, 0, + 0.92496835095531071, 0.0, + 0.53128887284876058, 0}; + std::vector const scalarFlux{0.47083980954039228, 0.94789941519098619, 1.4101892974729979}; + Real thermalEnergyFlux = 0.41622256825457099; + std::vector const testFluxes = + Compute_Fluxes(leftFastRareLeftSide, leftFastRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left fast rarefaction\n" + "Right State: Left of left fast rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.070492123816403796, 1.2489600267034342, + -0.71031457071286608, 0, + 0.21008080091470105, 0.0, + 0.058615131833681167, 0}; + std::vector const scalarFlux{0.078034606921016325, 0.15710005136841393, 0.23371763662029341}; + Real thermalEnergyFlux = 0.047345816580591255; + std::vector const testFluxes = + Compute_Fluxes(leftFastRareRightSide, leftFastRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of compound wave\n" + "Right State: Right of compound wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.4470171023231666, 0.60747660800918468, + -0.20506357956052623, 0, + 0.72655525704800772, 0.0, + 0.76278089951123285, 0}; + std::vector const scalarFlux{0.4948468279606959, 0.99623058485843297, 1.482091544807598}; + Real thermalEnergyFlux = 0.38787931087981475; + std::vector const testFluxes = Compute_Fluxes(compoundLeftSide, compoundRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of compound wave\n" + "Right State: Left of compound wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.38496850292724116, 0.66092864409611585, + -0.3473204105316457, 0, + 0.89888639514227009, 0.0, + 0.71658566275120927, 0}; + std::vector const scalarFlux{0.42615918171426637, 0.85794792823389721, 1.2763685331959034}; + Real thermalEnergyFlux = 0.28530908823756074; + std::vector const testFluxes = Compute_Fluxes(compoundRightSide, compoundLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of Compound Wave\n" + "Right State: Peak of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.41864266180405574, 0.63505764056357727, + -0.1991008813536404, 0, + 0.73707474818824525, 0.0, + 0.74058225030218761, 0}; + std::vector const scalarFlux{0.46343639240225803, 0.93299478173931882, 1.388015684704111}; + Real thermalEnergyFlux = 0.36325864563467081; + std::vector const testFluxes = Compute_Fluxes(compoundLeftSide, compoundPeak, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Peak of Compound Wave\n" + "Right State: Left of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.39520761138156862, 0.6390998385557225, + -0.35132701297727598, 0, + 0.89945171879176522, 0.0, + 0.71026545717401468, 0}; + std::vector const scalarFlux{0.43749384947851333, 0.88076699477714815, 1.3103164425435772}; + Real thermalEnergyFlux = 0.32239432669410983; + std::vector const testFluxes = Compute_Fluxes(compoundPeak, compoundLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Peak of Compound Wave\n" + "Right State: Right of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.4285899590904928, 0.6079309920345296, + -0.26055320217638239, 0, + 0.75090757444649436, 0.0, + 0.85591904930227747, 0}; + std::vector const scalarFlux{0.47444802592454061, 0.95516351251477749, 1.4209960899845735}; + Real thermalEnergyFlux = 0.34962629086469987; + std::vector const testFluxes = Compute_Fluxes(compoundPeak, compoundRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of Compound Wave\n" + "Right State: Peak of Compound Wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.39102247793946454, 0.65467021266207581, + -0.25227691377588229, 0, + 0.76271525822813691, 0.0, + 0.83594460438033491, 0}; + std::vector const scalarFlux{0.43286091709705776, 0.8714399289555731, 1.2964405732397004}; + Real thermalEnergyFlux = 0.28979582956267347; + std::vector const testFluxes = Compute_Fluxes(compoundRightSide, compoundPeak, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of contact discontinuity\n" + "Right State: Right of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.40753761783585118, 0.62106392255463172, + -0.2455554035355339, 0, + 0.73906344777217226, 0.0, + 0.8687394222350926, 0}; + std::vector const scalarFlux{0.45114313616335622, 0.90824587528847567, 1.3511967538747176}; + Real thermalEnergyFlux = 0.30895701155896288; + std::vector const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of contact discontinuity\n" + "Right State: Left of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.13849588572126192, 0.46025037934770729, + 0.18052412687974539, 0, + 0.35385590617992224, 0.0, + 0.86909622543144227, 0}; + std::vector const scalarFlux{0.15331460335320088, 0.30865449334158279, 0.45918507401922254}; + Real thermalEnergyFlux = 0.30928031735570188; + std::vector const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Slow shock left side\n" + "Right State: Slow shock right side\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{3.5274134848883865e-05, 0.32304849716274459, + 0.60579784881286636, 0, + -0.32813070621836449, 0.0, + 0.40636483121437972, 0}; + std::vector const scalarFlux{3.9048380136491711e-05, 7.8612589559210735e-05, 0.00011695189454326261}; + Real thermalEnergyFlux = 4.4037784886918126e-05; + std::vector const testFluxes = Compute_Fluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Slow shock right side\n" + "Right State: Slow shock left side\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.016514307834939734, 0.16452009375678914, + 0.71622171077118635, 0, + -0.37262428139914472, 0.0, + 0.37204015363322052, 0}; + std::vector const scalarFlux{-0.018281297976332211, -0.036804091985367396, -0.054753421923485097}; + Real thermalEnergyFlux = -0.020617189878790236; + std::vector const testFluxes = Compute_Fluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right fast rarefaction left side\n" + "Right State: Right fast rarefaction right side\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.026222824218991747, 0.22254903570732654, + 0.68544334213642255, 0, + -0.33339172106895454, 0.0, + 0.32319665359522443, 0}; + std::vector const scalarFlux{-0.029028601629558917, -0.058440671223894146, -0.086942145734385745}; + Real thermalEnergyFlux = -0.020960370728633469; + std::vector const testFluxes = + Compute_Fluxes(rightFastRareLeftSide, rightFastRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right fast rarefaction right side\n" + "Right State: Right fast rarefaction left side\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.001088867226159973, 0.32035322820305906, + 0.74922357263343131, 0, + -0.0099746892805345766, 0.0, + 0.0082135595470345102, 0}; + std::vector const scalarFlux{-0.0012053733294214947, -0.0024266696462237609, -0.0036101547366371614}; + Real thermalEnergyFlux = -0.00081785194236053073; + std::vector const testFluxes = + Compute_Fluxes(rightFastRareRightSide, rightFastRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Dai & Woodward Shock tube + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + Real const coef = 1. / (std::sqrt(4. * M_PI)); + Real const Bx = 4. * coef; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = Primitive_2_Conserved({1.08, 0.0, 0.0, 0.0, 1.0, Bx, 3.6 * coef, 2 * coef}, gamma, primitiveScalar), + leftFastShockLeftSide = Primitive_2_Conserved( + {1.09406, 1.176560, 0.021003, 0.506113, 0.970815, 1.12838, 1.105355, 0.614087}, gamma, primitiveScalar), + leftFastShockRightSide = Primitive_2_Conserved( + {1.40577, 0.693255, 0.210562, 0.611423, 1.494290, 1.12838, 1.457700, 0.809831}, gamma, primitiveScalar), + leftRotationLeftSide = Primitive_2_Conserved( + {1.40086, 0.687774, 0.215124, 0.609161, 1.485660, 1.12838, 1.458735, 0.789960}, gamma, primitiveScalar), + leftRotationRightSide = Primitive_2_Conserved( + {1.40119, 0.687504, 0.330268, 0.334140, 1.486570, 1.12838, 1.588975, 0.475782}, gamma, primitiveScalar), + leftSlowShockLeftSide = Primitive_2_Conserved( + {1.40519, 0.685492, 0.326265, 0.333664, 1.493710, 1.12838, 1.575785, 0.472390}, gamma, primitiveScalar), + leftSlowShockRightSide = Primitive_2_Conserved( + {1.66488, 0.578545, 0.050746, 0.250260, 1.984720, 1.12838, 1.344490, 0.402407}, gamma, primitiveScalar), + contactLeftSide = Primitive_2_Conserved( + {1.65220, 0.578296, 0.049683, 0.249962, 1.981250, 1.12838, 1.346155, 0.402868}, gamma, primitiveScalar), + contactRightSide = Primitive_2_Conserved( + {1.49279, 0.578276, 0.049650, 0.249924, 1.981160, 1.12838, 1.346180, 0.402897}, gamma, primitiveScalar), + rightSlowShockLeftSide = Primitive_2_Conserved( + {1.48581, 0.573195, 0.035338, 0.245592, 1.956320, 1.12838, 1.370395, 0.410220}, gamma, primitiveScalar), + rightSlowShockRightSide = Primitive_2_Conserved( + {1.23813, 0.450361, -0.275532, 0.151746, 1.439000, 1.12838, 1.609775, 0.482762}, gamma, primitiveScalar), + rightRotationLeftSide = Primitive_2_Conserved( + {1.23762, 0.450102, -0.274410, 0.145585, 1.437950, 1.12838, 1.606945, 0.493879}, gamma, primitiveScalar), + rightRotationRightSide = Primitive_2_Conserved( + {1.23747, 0.449993, -0.180766, -0.090238, 1.437350, 1.12838, 1.503855, 0.752090}, gamma, primitiveScalar), + rightFastShockLeftSide = Primitive_2_Conserved( + {1.22305, 0.424403, -0.171402, -0.085701, 1.409660, 1.12838, 1.447730, 0.723864}, gamma, primitiveScalar), + rightFastShockRightSide = Primitive_2_Conserved( + {1.00006, 0.000121, -0.000057, -0.000028, 1.000100, 1.12838, 1.128435, 0.564217}, gamma, primitiveScalar), + rightICs = Primitive_2_Conserved({1.0, 0.0, 0.0, 1.0, 0.2, Bx, 4 * coef, 2 * coef}, gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Dai & Woodward state\n" + "Right State: Left Dai & Woodward state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 1.0381971863420549, -1.1459155902616465, -0.63661977236758127, 0, 0.0, + 0, -1.1102230246251565e-16}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Dai & Woodward state\n" + "Right State: Right Dai & Woodward state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0, 0.35915494309189522, -1.2732395447351625, -0.63661977236758127, -0.63661977236758172, + 0.0, 2.2204460492503131e-16, -1.1283791670955123}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Dai & Woodward state\n" + "Right State: Right Dai & Woodward state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.17354924587196074, 0.71614983677687327, -1.1940929411768009, + -1.1194725181819352, -0.11432087006939984, 0.0, + 0.056156000248263505, -0.42800560867873094}; + std::vector const scalarFlux{0.19211858644420357, 0.38677506032368902, 0.57540498691841158}; + Real thermalEnergyFlux = 0.24104061926661174; + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Dai & Woodward state\n" + "Right State: Left Dai & Woodward state\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.17354924587196074, 0.71614983677687327, -1.1940929411768009, + -0.14549552299758384, -0.47242308031148195, 0.0, + -0.056156000248263505, -0.55262526758377528}; + std::vector const scalarFlux{-0.19211858644420357, -0.38677506032368902, -0.57540498691841158}; + Real thermalEnergyFlux = -0.24104061926661174; + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + + // Cross wave checks + { + std::string const outputString{ + "Left State: Left of left fast shock\n" + "Right State: Right of left fast shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.96813688187727132, 3.0871217875403394, -1.4687093290523414, + -0.33726008721080036, 4.2986213406773457, 0.0, + 0.84684181393860269, -0.087452560407274671}; + std::vector const scalarFlux{1.0717251365527865, 2.157607767226648, 3.2098715673061045}; + Real thermalEnergyFlux = 1.2886155333980993; + std::vector const testFluxes = + Compute_Fluxes(leftFastShockLeftSide, leftFastShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left fast shock\n" + "Right State: Left of left fast shock\n" + "HLLD State: Left Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1.3053938862274184, 2.4685129176021858, -1.181892850065283, + -0.011160487372167127, 5.1797404608257249, 0.0, + 1.1889903073770265, 0.10262704114294516}; + std::vector const scalarFlux{1.4450678072086958, 2.9092249669830292, 4.3280519500627666}; + Real thermalEnergyFlux = 2.081389946702628; + std::vector const testFluxes = + Compute_Fluxes(leftFastShockRightSide, leftFastShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of left rotation/Alfven wave\n" + "Right State: Right of left rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.96326128304298586, 2.8879592118317445, -1.4808188010794987, + -0.20403672861184916, 4.014027751838869, 0.0, + 0.7248753989305099, -0.059178137562467162}; + std::vector const scalarFlux{1.0663278606879119, 2.1467419174572049, 3.1937064501984724}; + Real thermalEnergyFlux = 1.5323573637968553; + std::vector const testFluxes = + Compute_Fluxes(leftRotationLeftSide, leftRotationRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left rotation/Alfven wave\n" + "Right State: Left of left rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.96353754504060063, 2.8875487093397085, -1.4327309336053695, + -0.31541343522923493, 3.9739842521208342, 0.0, + 0.75541746728406312, -0.13479771672887678}; + std::vector const scalarFlux{1.0666336820367937, 2.1473576000564334, 3.1946224007710313}; + Real thermalEnergyFlux = 1.5333744977458499; + std::vector const testFluxes = + Compute_Fluxes(leftRotationRightSide, leftRotationLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of left slow shock\n" + "Right State: Right of left slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.88716095730727451, 2.9828594399125663, -1.417062582518549, + -0.21524331343191233, 3.863474778369334, 0.0, + 0.71242370728996041, -0.05229712416644372}; + std::vector const scalarFlux{0.98208498809672407, 1.9771433235295921, 2.9413947405483505}; + Real thermalEnergyFlux = 1.4145715457049737; + std::vector const testFluxes = + Compute_Fluxes(leftSlowShockLeftSide, leftSlowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of left slow shock\n" + "Right State: Left of left slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1.042385440439527, 2.7732383399777376, -1.5199872074603551, + -0.21019362664841068, 4.1322001036232585, 0.0, + 0.72170937317481543, -0.049474715634396704}; + std::vector const scalarFlux{1.1539181074575644, 2.323079478570472, 3.4560437166206879}; + Real thermalEnergyFlux = 1.8639570701934713; + std::vector const testFluxes = + Compute_Fluxes(leftSlowShockRightSide, leftSlowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of contact discontinuity\n" + "Right State: Right of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.95545795601418737, 2.8843900822429749, -1.4715039715239722, + -0.21575736014726318, 4.0078718055059257, 0.0, + 0.72241353110189066, -0.049073560388753337}; + std::vector const scalarFlux{1.0576895969443709, 2.1293512784652289, 3.1678344087247892}; + Real thermalEnergyFlux = 1.7186185770667382; + std::vector const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of contact discontinuity\n" + "Right State: Left of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.86324813554422819, 2.8309913324581251, -1.4761428591480787, + -0.23887765947428419, 3.9892942559102793, 0.0, + 0.72244123046603836, -0.049025527032060034}; + std::vector const scalarFlux{0.95561355347926669, 1.9238507665182214, 2.8621114407298114}; + Real thermalEnergyFlux = 1.7184928987481187; + std::vector const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of right slow shock\n" + "Right State: Right of right slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.81125524370350677, 2.901639500435365, -1.5141545346789429, + -0.262600896007809, 3.8479660419540087, 0.0, + 0.7218977970017596, -0.049091614519593846}; + std::vector const scalarFlux{0.89805755065482806, 1.8079784457999033, 2.6897282701827465}; + Real thermalEnergyFlux = 1.6022319728249694; + std::vector const testFluxes = + Compute_Fluxes(rightSlowShockLeftSide, rightSlowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of right slow shock\n" + "Right State: Left of right slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.60157947557836688, 2.3888357198399746, -1.9910500022202977, + -0.45610948442354332, 3.5359430988850069, 0.0, + 1.0670963294022622, 0.05554893654378229}; + std::vector const scalarFlux{0.66594699332331575, 1.3406911495770899, 1.994545286188885}; + Real thermalEnergyFlux = 1.0487665253534804; + std::vector const testFluxes = + Compute_Fluxes(rightSlowShockRightSide, rightSlowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of right rotation/Alfven wave\n" + "Right State: Right of right rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.55701691287884714, 2.4652223621237814, -1.9664615862227277, + -0.47490477894092042, 3.3900659850690529, 0.0, + 1.0325648885587542, 0.059165409025635551}; + std::vector const scalarFlux{0.61661634650230224, 1.2413781978573175, 1.8467974773272691}; + Real thermalEnergyFlux = 0.9707694646266285; + std::vector const testFluxes = + Compute_Fluxes(rightRotationLeftSide, rightRotationRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of right rotation/Alfven wave\n" + "Right State: Left of right rotation/Alfven wave\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.55689116371132596, 2.4648517303940851, -1.7972202655166787, + -0.90018282739798461, 3.3401033852664566, 0.0, + 0.88105841856465605, 0.43911718823267476}; + std::vector const scalarFlux{0.61647714248450702, 1.2410979509359938, 1.8463805541782863}; + Real thermalEnergyFlux = 0.9702629326292449; + std::vector const testFluxes = + Compute_Fluxes(rightRotationRightSide, rightRotationLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of right fast shock\n" + "Right State: Right of right fast shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.48777637414577313, 2.3709438477809708, -1.7282900552525988, + -0.86414423547773778, 2.8885015704245069, 0.0, + 0.77133731061645838, 0.38566794697432505}; + std::vector const scalarFlux{0.53996724117661621, 1.0870674521621893, 1.6172294888076189}; + Real thermalEnergyFlux = 0.84330016382608752; + std::vector const testFluxes = + Compute_Fluxes(rightFastShockLeftSide, rightFastShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of right fast shock\n" + "Right State: Left of right fast shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.040639426423817904, 1.0717156491947966, -1.2612066401572222, + -0.63060225433149875, 0.15803727234007203, 0.0, + 0.042555541396817498, 0.021277678888288909}; + std::vector const scalarFlux{0.044987744655527385, 0.090569777630660403, 0.13474059488003065}; + Real thermalEnergyFlux = 0.060961577855018087; + std::vector const testFluxes = + Compute_Fluxes(rightFastShockRightSide, rightFastShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Ryu & Jones 4d Shock tube + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + Real const Bx = 0.7; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = Primitive_2_Conserved({1.0, 0.0, 0.0, 0.0, 1.0, Bx, 0.0, 0.0}, gamma, primitiveScalar), + hydroRareLeftSide = Primitive_2_Conserved( + {0.990414, 0.012415, 1.458910e-58, 6.294360e-59, 0.984076, Bx, 1.252355e-57, 5.366795e-58}, gamma, + primitiveScalar), + hydroRareRightSide = Primitive_2_Conserved( + {0.939477, 0.079800, 1.557120e-41, 7.505190e-42, 0.901182, Bx, 1.823624e-40, 8.712177e-41}, gamma, + primitiveScalar), + switchOnSlowShockLeftSide = Primitive_2_Conserved( + {0.939863, 0.079142, 1.415730e-02, 7.134030e-03, 0.901820, Bx, 2.519650e-02, 1.290082e-02}, gamma, + primitiveScalar), + switchOnSlowShockRightSide = Primitive_2_Conserved( + {0.651753, 0.322362, 8.070540e-01, 4.425110e-01, 0.490103, Bx, 6.598380e-01, 3.618000e-01}, gamma, + primitiveScalar), + contactLeftSide = Primitive_2_Conserved( + {0.648553, 0.322525, 8.072970e-01, 4.426950e-01, 0.489951, Bx, 6.599295e-01, 3.618910e-01}, gamma, + primitiveScalar), + contactRightSide = Primitive_2_Conserved( + {0.489933, 0.322518, 8.073090e-01, 4.426960e-01, 0.489980, Bx, 6.599195e-01, 3.618850e-01}, gamma, + primitiveScalar), + slowShockLeftSide = Primitive_2_Conserved( + {0.496478, 0.308418, 8.060830e-01, 4.420150e-01, 0.489823, Bx, 6.686695e-01, 3.666915e-01}, gamma, + primitiveScalar), + slowShockRightSide = Primitive_2_Conserved( + {0.298260, -0.016740, 2.372870e-01, 1.287780e-01, 0.198864, Bx, 8.662095e-01, 4.757390e-01}, gamma, + primitiveScalar), + rotationLeftSide = Primitive_2_Conserved( + {0.298001, -0.017358, 2.364790e-01, 1.278540e-01, 0.198448, Bx, 8.669425e-01, 4.750845e-01}, gamma, + primitiveScalar), + rotationRightSide = Primitive_2_Conserved( + {0.297673, -0.018657, 1.059540e-02, 9.996860e-01, 0.197421, Bx, 9.891580e-01, 1.024949e-04}, gamma, + primitiveScalar), + fastRareLeftSide = Primitive_2_Conserved( + {0.297504, -0.020018, 1.137420e-02, 1.000000e+00, 0.197234, Bx, 9.883860e-01, -4.981931e-17}, gamma, + primitiveScalar), + fastRareRightSide = Primitive_2_Conserved( + {0.299996, -0.000033, 1.855120e-05, 1.000000e+00, 0.199995, Bx, 9.999865e-01, 1.737190e-16}, gamma, + primitiveScalar), + rightICs = Primitive_2_Conserved({0.3, 0.0, 0.0, 1.0, 0.2, Bx, 1.0, 0.0}, gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Ryu & Jones 4d state\n" + "Right State: Left Ryu & Jones 4d state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 0.75499999999999989, 0, 0, 2.2204460492503131e-16, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Ryu & Jones 4d state\n" + "Right State: Right Ryu & Jones 4d state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -5.5511151231257827e-17, 0.45500000000000013, -0.69999999999999996, -5.5511151231257827e-17, 0, 0.0, 0, + -0.69999999999999996}; + std::vector const scalarFlux{-6.1450707278254418e-17, -1.2371317869019906e-16, -1.8404800947169341e-16}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Ryu & Jones 4d state\n" + "Right State: Right Ryu & Jones 4d state\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.092428729855986602, 0.53311593977445149, -0.39622049648437296, + -0.21566989083797167, -0.13287876964320211, 0.0, + -0.40407579574102892, -0.21994567048141428}; + std::vector const scalarFlux{0.10231837561464294, 0.20598837745492582, 0.30644876517012837}; + Real thermalEnergyFlux = 0.13864309478397996; + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Ryu & Jones 4d state\n" + "Right State: Left Ryu & Jones 4d state\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.092428729855986602, 0.53311593977445149, -0.39622049648437296, + 0.21566989083797167, 0.13287876964320211, 0.0, + 0.40407579574102892, -0.21994567048141428}; + std::vector const scalarFlux{-0.10231837561464294, -0.20598837745492582, -0.30644876517012837}; + Real thermalEnergyFlux = -0.13864309478397996; + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + + // Cross wave checks + { + std::string const outputString{ + "Left State: Left side of pure hydrodynamic rarefaction\n" + "Right State: Right side of pure hydrodynamic rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.074035256375659553, 0.66054553664209648, -6.1597070943493028e-41, + -2.9447391900433873e-41, 0.1776649658235645, 0.0, + -6.3466063324344113e-41, -3.0340891384335242e-41}; + std::vector const scalarFlux{0.081956845911157775, 0.16499634214430131, 0.24546494288869905}; + Real thermalEnergyFlux = 0.11034221894046368; + std::vector const testFluxes = Compute_Fluxes(hydroRareLeftSide, hydroRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right side of pure hydrodynamic rarefaction\n" + "Right State: Left side of pure hydrodynamic rarefaction\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.013336890338886076, 0.74071279157971992, -6.1745213352160876e-41, + -2.9474651270630147e-41, 0.033152482405470307, 0.0, + 6.2022392844946449e-41, 2.9606965476795895e-41}; + std::vector const scalarFlux{0.014763904657692993, 0.029722840565719184, 0.044218649135708464}; + Real thermalEnergyFlux = 0.019189877201961154; + std::vector const testFluxes = Compute_Fluxes(hydroRareRightSide, hydroRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of switch on slow shock\n" + "Right State: Right of switch on slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.19734622040826083, 0.47855039640569758, -0.3392293209655618, + -0.18588204716255491, 0.10695446263054809, 0.0, + -0.3558357543098733, -0.19525093130352045}; + std::vector const scalarFlux{0.21846177846784187, 0.43980943806215089, 0.65430419361309078}; + Real thermalEnergyFlux = 0.2840373040888583; + std::vector const testFluxes = + Compute_Fluxes(switchOnSlowShockLeftSide, switchOnSlowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of switch on slow shock\n" + "Right State: Left of switch on slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.097593254768855386, 0.76483698872352757, -0.02036438492698419, + -0.010747481940703562, 0.25327551496496836, 0.0, + -0.002520109973016129, -0.00088262199017708799}; + std::vector const scalarFlux{0.10803549193474633, 0.21749813322875222, 0.32357182079044206}; + Real thermalEnergyFlux = 0.1100817647375162; + std::vector const testFluxes = + Compute_Fluxes(switchOnSlowShockRightSide, switchOnSlowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of contact discontinuity\n" + "Right State: Right of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.2091677440314007, 0.5956612619664029, -0.29309091669513981, + -0.16072556008504282, 0.19220050968424285, 0.0, + -0.35226977371803297, -0.19316940226499904}; + std::vector const scalarFlux{0.23154817591476573, 0.46615510432814616, 0.69349862290347741}; + Real thermalEnergyFlux = 0.23702444986592192; + std::vector const testFluxes = Compute_Fluxes(contactLeftSide, contactRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of contact discontinuity\n" + "Right State: Left of contact discontinuity\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.15801775068597168, 0.57916072367837657, -0.33437339604094024, + -0.18336617461176744, 0.16789791355547545, 0.0, + -0.3522739911439669, -0.19317084712861482}; + std::vector const scalarFlux{0.17492525964231936, 0.35216128279157616, 0.52391009427617696}; + Real thermalEnergyFlux = 0.23704936434506069; + std::vector const testFluxes = Compute_Fluxes(contactRightSide, contactLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of slow shock\n" + "Right State: Right of slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.11744487326715558, 0.66868230621718128, -0.35832022960458892, + -0.19650694834641164, 0.057880816021092185, 0.0, + -0.37198011453582402, -0.20397277844271294}; + std::vector const scalarFlux{0.13001118457092631, 0.26173981750473918, 0.38939014356639379}; + Real thermalEnergyFlux = 0.1738058891582446; + std::vector const testFluxes = Compute_Fluxes(slowShockLeftSide, slowShockRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of slow shock\n" + "Right State: Left of slow shock\n" + "HLLD State: Left Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0.038440990187426027, 0.33776683678923869, -0.62583241538732792, + -0.3437911783906169, -0.13471828103488348, 0.0, + -0.15165427985881363, -0.082233932588833825}; + std::vector const scalarFlux{0.042554081172858457, 0.085670301959209896, 0.12745164834795927}; + Real thermalEnergyFlux = 0.038445630017261548; + std::vector const testFluxes = Compute_Fluxes(slowShockRightSide, slowShockLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of rotation/Alfven wave\n" + "Right State: Right of rotation/Alfven wave\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.0052668366104996478, 0.44242247672452317, -0.60785196341731951, + -0.33352435102145184, -0.21197843894720192, 0.0, + -0.18030635192654354, -0.098381113757603278}; + std::vector const scalarFlux{-0.0058303751166299484, -0.011737769516117116, -0.017462271505355991}; + Real thermalEnergyFlux = -0.0052395622905745485; + std::vector const testFluxes = Compute_Fluxes(rotationLeftSide, rotationRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of rotation/Alfven wave\n" + "Right State: Left of rotation/Alfven wave\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.005459628948343731, 0.4415038084184626, -0.69273580053867279, + -0.0051834737482743809, -0.037389286119015486, 0.0, + -0.026148289294373184, -0.69914753968916865}; + std::vector const scalarFlux{-0.0060437957583491572, -0.012167430087241717, -0.018101477236719343}; + Real thermalEnergyFlux = -0.0054536013916442853; + std::vector const testFluxes = Compute_Fluxes(rotationRightSide, rotationLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left of fast rarefaction\n" + "Right State: Right of fast rarefaction\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-0.0059354802028144249, 0.44075681881443612, -0.69194176811725872, + -0.0059354802028144804, -0.040194357552219451, 0.0, + -0.027710302430178135, -0.70000000000000007}; + std::vector const scalarFlux{-0.0065705619215052757, -0.013227920997059845, -0.019679168822056604}; + Real thermalEnergyFlux = -0.0059354109546219782; + std::vector const testFluxes = Compute_Fluxes(fastRareLeftSide, fastRareRightSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right of fast rarefaction\n" + "Right State: Left of fast rarefaction\n" + "HLLD State: Right Double Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-3.0171858819483255e-05, 0.45503057873272706, -0.69998654276213712, + -3.0171858819427744e-05, -0.00014827469339251387, 0.0, + -8.2898844654399895e-05, -0.69999999999999984}; + std::vector const scalarFlux{-3.340017317660794e-05, -6.7241562798797897e-05, -0.00010003522597924373}; + Real thermalEnergyFlux = -3.000421709818028e-05; + std::vector const testFluxes = Compute_Fluxes(fastRareRightSide, fastRareLeftSide, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using various states and waves from + * the Einfeldt Strong Rarefaction (EFR) + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + Real const V0 = 2.; + Real const Vy = 0.0; + Real const Vz = 0.0; + Real const Bx = 0.0; + Real const Bz = 0.0; + + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + leftICs = Primitive_2_Conserved({1.0, -V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar), + leftRarefactionCenter = + Primitive_2_Conserved({0.368580, -1.180830, Vy, Vz, 0.111253, Bx, 0.183044, Bz}, gamma, primitiveScalar), + leftVxTurnOver = + Primitive_2_Conserved({0.058814, -0.125475, Vy, Vz, 0.008819, Bx, 0.029215, Bz}, gamma, primitiveScalar), + midPoint = + Primitive_2_Conserved({0.034658, 0.000778, Vy, Vz, 0.006776, Bx, 0.017333, Bz}, gamma, primitiveScalar), + rightVxTurnOver = + Primitive_2_Conserved({0.062587, 0.152160, Vy, Vz, 0.009521, Bx, 0.031576, Bz}, gamma, primitiveScalar), + rightRarefactionCenter = + Primitive_2_Conserved({0.316485, 1.073560, Vy, Vz, 0.089875, Bx, 0.159366, Bz}, gamma, primitiveScalar), + rightICs = Primitive_2_Conserved({1.0, V0, Vy, Vz, 0.45, Bx, 0.5, Bz}, gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + // Initial Condition Checks + { + std::string const outputString{ + "Left State: Left Einfeldt Strong Rarefaction state\n" + "Right State: Left Einfeldt Strong Rarefaction state\n" + "HLLD State: Right"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; + std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; + Real thermalEnergyFlux = -1.3499999999999996; + std::vector const testFluxes = Compute_Fluxes(leftICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Einfeldt Strong Rarefaction state\n" + "Right State: Right Einfeldt Strong Rarefaction state\n" + "HLLD State: Left"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; + std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; + Real thermalEnergyFlux = 1.3499999999999996; + std::vector const testFluxes = Compute_Fluxes(rightICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Einfeldt Strong Rarefaction state\n" + "Right State: Right Einfeldt Strong Rarefaction state\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, -1.4249999999999998, -0, -0, 0, 0.0, 0, -0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(leftICs, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Einfeldt Strong Rarefaction state\n" + "Right State: Left Einfeldt Strong Rarefaction state\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 10.574999999999999, 0, 0, 0, 0.0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.0; + std::vector const testFluxes = Compute_Fluxes(rightICs, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + + // Intermediate state checks + { + std::string const outputString{ + "Left State: Left Einfeldt Strong Rarefaction state\n" + "Right State: Left rarefaction center\n" + "HLLD State: Right"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.43523032140000006, 0.64193857338676208, -0, -0, -0.67142479846795033, 0.0, -0.21614384652000002, -0}; + std::vector const scalarFlux{-0.48179889059681413, -0.9699623468164007, -1.4430123054318851}; + Real thermalEnergyFlux = -0.19705631998499995; + std::vector const testFluxes = Compute_Fluxes(leftICs, leftRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left rarefaction center\n" + "Right State: Left Einfeldt Strong Rarefaction state\n" + "HLLD State: Right"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-2, 4.5750000000000002, -0, -0, -6.75, 0.0, -1, -0}; + std::vector const scalarFlux{-2.2139950592000002, -4.4572370036000004, -6.6310283749999996}; + Real thermalEnergyFlux = -1.3499999999999996; + std::vector const testFluxes = Compute_Fluxes(leftRarefactionCenter, leftICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left rarefaction center\n" + "Right State: Left Vx turnover point\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.023176056428381629, -2.0437812714100764e-05, 0, 0, -0.00098843768795337005, 0.0, -0.011512369309265979, 0}; + std::vector const scalarFlux{-0.025655837212088663, -0.051650588155052128, -0.076840543898599858}; + Real thermalEnergyFlux = -0.0052127803322822184; + std::vector const testFluxes = Compute_Fluxes(leftRarefactionCenter, leftVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Vx turnover point\n" + "Right State: Left rarefaction center\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.43613091609689758, 0.64135749005731213, 0, 0, -0.67086080671260462, 0.0, -0.21659109937066717, 0}; + std::vector const scalarFlux{-0.48279584670145054, -0.9719694288205295, -1.445998239926636}; + Real thermalEnergyFlux = -0.19746407621898149; + std::vector const testFluxes = Compute_Fluxes(leftVxTurnOver, leftRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Left Vx turnover point\n" + "Right State: Midpoint\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.0011656375857387598, 0.0062355370788444902, 0, 0, -0.00055517615333601446, 0.0, -0.0005829533231464588, 0}; + std::vector const scalarFlux{-0.0012903579278217153, -0.0025977614899708843, -0.0038646879530001054}; + Real thermalEnergyFlux = -0.00034184143405415065; + std::vector const testFluxes = Compute_Fluxes(leftVxTurnOver, midPoint, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Midpoint\n" + "Right State: Left Vx turnover point\n" + "HLLD State: Right Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -0.0068097924351817191, 0.010501781004354172, 0, 0, -0.0027509360975397175, 0.0, -0.0033826654536986789, 0}; + std::vector const scalarFlux{-0.0075384234028349319, -0.015176429414463658, -0.022577963432775162}; + Real thermalEnergyFlux = -0.001531664896602873; + std::vector const testFluxes = Compute_Fluxes(midPoint, leftVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Midpoint\n" + "Right State: Right Vx turnover point\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.0013952100758668729, 0.0061359407125797273, 0, 0, 0.00065984543596031629, 0.0, 0.00069776606396793105, 0}; + std::vector const scalarFlux{0.001544494107257657, 0.0031093909889746947, 0.0046258388010795683}; + Real thermalEnergyFlux = 0.00040916715364737997; + std::vector const testFluxes = Compute_Fluxes(midPoint, rightVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Vx turnover point\n" + "Right State: Midpoint\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.0090024688079190333, 0.011769373146023688, 0, 0, 0.003725251767222792, 0.0, 0.0045418689996141555, 0}; + std::vector const scalarFlux{0.0099657107306674268, 0.020063068547205749, 0.029847813055181766}; + Real thermalEnergyFlux = 0.0020542406295284269; + std::vector const testFluxes = Compute_Fluxes(rightVxTurnOver, midPoint, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Vx turnover point\n" + "Right State: Right rarefaction center\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.023310393229073981, 0.0033086897645311728, 0, 0, 0.0034208520409618887, 0.0, 0.011760413130542123, 0}; + std::vector const scalarFlux{0.025804547718589466, 0.051949973634547723, 0.077285939467198722}; + Real thermalEnergyFlux = 0.0053191138878843835; + std::vector const testFluxes = Compute_Fluxes(rightVxTurnOver, rightRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right rarefaction center\n" + "Right State: Right Vx turnover point\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.33914253809565298, 0.46770133685446141, 0, 0, 0.46453338019960133, 0.0, 0.17077520175095764, 0}; + std::vector const scalarFlux{0.37542995185416178, 0.75581933514738364, 1.1244318966408966}; + Real thermalEnergyFlux = 0.1444638874418068; + std::vector const testFluxes = Compute_Fluxes(rightRarefactionCenter, rightVxTurnOver, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right rarefaction center\n" + "Right State: Right Einfeldt Strong Rarefaction state\n" + "HLLD State: Left"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + 0.33976563660000003, 0.46733255780629601, 0, 0, 0.46427650313257612, 0.0, 0.17108896296000001, 0}; + std::vector const scalarFlux{0.37611972035917141, 0.75720798400261535, 1.1264977885722693}; + Real thermalEnergyFlux = 0.14472930749999999; + std::vector const testFluxes = Compute_Fluxes(rightRarefactionCenter, rightICs, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Right Einfeldt Strong Rarefaction state\n" + "Right State: Right rarefaction center\n" + "HLLD State: Left"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{2, 4.5750000000000002, 0, 0, 6.75, 0.0, 1, 0}; + std::vector const scalarFlux{2.2139950592000002, 4.4572370036000004, 6.6310283749999996}; + Real thermalEnergyFlux = 1.3499999999999996; + std::vector const testFluxes = Compute_Fluxes(rightICs, rightRarefactionCenter, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver using the constant states from the + * examples in cholla/examples/3D + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, ConstantStatesExpectCorrectFlux) +{ + // Constant Values + Real const gamma = 5. / 3.; + + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // States + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + zeroMagneticField = + Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 0.0, 0.0, 0.0}, gamma, primitiveScalar), + onesMagneticField = + Primitive_2_Conserved({1e4, 0.0, 0.0, 0.0, 1.380658E-5, 1.0, 1.0, 1.0}, gamma, primitiveScalar); + + for (size_t direction = 0; direction < 3; direction++) { + { + std::string const outputString{ + "Left State: Constant state, zero magnetic field\n" + "Right State: Constant state, zero magnetic field\n" + "HLLD State: Left Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{0, 1.380658e-05, 0, 0, 0, 0, 0, 0}; + std::vector const scalarFlux{0, 0, 0}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = Compute_Fluxes(zeroMagneticField, zeroMagneticField, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Constant state, ones magnetic field\n" + "Right State: Constant state, ones magnetic field\n" + "HLLD State: Left Double Star"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{ + -1.42108547152020037174e-14, 0.50001380657999994, -1, -1, -1.7347234759768071e-18, 0.0, + 3.4694469519536142e-18, 3.4694469519536142e-18}; + std::vector const scalarFlux{1.5731381063233131e-14, 3.1670573744690958e-14, 4.7116290424753513e-14}; + Real thermalEnergyFlux = 0.; + std::vector const testFluxes = Compute_Fluxes(onesMagneticField, onesMagneticField, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver with the degenerate state + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, DegenerateStateCorrectInputExpectCorrectOutput) +{ + // Constant Values + Real const gamma = 5. / 3.; + std::vector const primitiveScalar{1.1069975296, 2.2286185018, 3.3155141875}; + + // State + std::vector const // | Density | X-Velocity | Y-Velocity | Z-Velocity | + // Pressure | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive + // Scalars | + state = Primitive_2_Conserved({1.0, 1.0, 1.0, 1.0, 1.0, 3.0E4, 1.0, 1.0}, gamma, primitiveScalar); + + std::vector const fiducialFlux{1, -449999997, -29999, -29999, -59994, 0.0, -29999, -29999}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; + Real thermalEnergyFlux = 1.5; + std::string const outputString{ + "Left State: Degenerate state\n" + "Right State: Degenerate state\n" + "HLLD State: Left Double Star State"}; + + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + // If you run into issues with the energy try 0.001953125 instead. + // That's what I got when running the Athena solver on its own. Running + // the Athena solver with theses tests gave me -0.00080700946455175148 + // though + for (size_t direction = 0; direction < 3; direction++) { + std::vector const testFluxes = Compute_Fluxes(state, state, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the HLLD Riemann Solver with all zeroes + * + */ +TEST_F(tMHDCalculateHLLDFluxesCUDA, AllZeroesExpectAllZeroes) +{ + // Constant Values + Real const gamma = 5. / 3.; + + // State + size_t numElements = 8; + #ifdef SCALAR + numElements += 3; + #endif // SCALAR + + std::vector const state(numElements, 0.0); + std::vector const fiducialFlux(8, 0.0); + std::vector const scalarFlux(3, 0.0); + Real thermalEnergyFlux = 0.0; + + std::string const outputString{ + "Left State: All zeroes\n" + "Right State: All zeroes\n" + "HLLD State: Right Star State"}; + + for (size_t direction = 0; direction < 3; direction++) { + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const testFluxes = Compute_Fluxes(state, state, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } +} +// ========================================================================= + +// ========================================================================= +/*! +* \brief Test the HLLD Riemann Solver with negative pressure, energy, and + density. +* +*/ +TEST_F(tMHDCalculateHLLDFluxesCUDA, UnphysicalValuesExpectAutomaticFix) +{ + // Constant Values + Real const gamma = 5. / 3.; + + // States + std::vector // | Density | X-Momentum | Y-Momentum | Z-Momentum | + // Energy | X-Magnetic Field | Y-Magnetic Field | + // Z-Magnetic Field | Adiabatic Index | Passive Scalars | + negativePressure = {1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0}, + negativeEnergy = {1.0, 1.0, 1.0, 1.0, -(5 - gamma), 1.0, 1.0, 1.0}, + negativeDensity = {-1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, + negativeDensityEnergyPressure = {-1.0, -1.0, -1.0, -1.0, -gamma, 1.0, 1.0, 1.0}, + negativeDensityPressure = {-1.0, 1.0, 1.0, 1.0, -1.0, 1.0, 1.0, 1.0}; + + #ifdef SCALAR + std::vector const conservedScalar{1.1069975296, 2.2286185018, 3.3155141875}; + negativePressure.insert(negativePressure.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeEnergy.insert(negativeEnergy.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensity.insert(negativeDensity.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensityEnergyPressure.insert(negativeDensityEnergyPressure.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + negativeDensityPressure.insert(negativeDensityPressure.begin() + 5, conservedScalar.begin(), + conservedScalar.begin() + grid_enum::nscalars); + #endif // SCALAR + #ifdef DE + negativePressure.push_back(mhd::utils::computeThermalEnergy( + negativePressure.at(4), negativePressure.at(0), negativePressure.at(1), negativePressure.at(2), + negativePressure.at(3), negativePressure.at(grid_enum::magnetic_x), negativePressure.at(grid_enum::magnetic_y), + negativePressure.at(grid_enum::magnetic_z), gamma)); + negativeEnergy.push_back(mhd::utils::computeThermalEnergy( + negativeEnergy.at(4), negativeEnergy.at(0), negativeEnergy.at(1), negativeEnergy.at(2), negativeEnergy.at(3), + negativeEnergy.at(grid_enum::magnetic_x), negativeEnergy.at(grid_enum::magnetic_y), + negativeEnergy.at(grid_enum::magnetic_z), gamma)); + negativeDensity.push_back(mhd::utils::computeThermalEnergy( + negativeDensity.at(4), negativeDensity.at(0), negativeDensity.at(1), negativeDensity.at(2), negativeDensity.at(3), + negativeDensity.at(grid_enum::magnetic_x), negativeDensity.at(grid_enum::magnetic_y), + negativeDensity.at(grid_enum::magnetic_z), gamma)); + negativeDensityEnergyPressure.push_back(mhd::utils::computeThermalEnergy( + negativeDensityEnergyPressure.at(4), negativeDensityEnergyPressure.at(0), negativeDensityEnergyPressure.at(1), + negativeDensityEnergyPressure.at(2), negativeDensityEnergyPressure.at(3), + negativeDensityEnergyPressure.at(grid_enum::magnetic_x), negativeDensityEnergyPressure.at(grid_enum::magnetic_y), + negativeDensityEnergyPressure.at(grid_enum::magnetic_z), gamma)); + negativeDensityPressure.push_back(mhd::utils::computeThermalEnergy( + negativeDensityPressure.at(4), negativeDensityPressure.at(0), negativeDensityPressure.at(1), + negativeDensityPressure.at(2), negativeDensityPressure.at(3), negativeDensityPressure.at(grid_enum::magnetic_x), + negativeDensityPressure.at(grid_enum::magnetic_y), negativeDensityPressure.at(grid_enum::magnetic_z), gamma)); + #endif // DE + + for (size_t direction = 0; direction < 3; direction++) { + { + std::string const outputString{ + "Left State: Negative Pressure\n" + "Right State: Negative Pressure\n" + "HLLD State: Left Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1.5, 0, 0, -1.6254793235168146e-16, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; + Real thermalEnergyFlux = -1.5; + std::vector const testFluxes = Compute_Fluxes(negativePressure, negativePressure, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Energy\n" + "Right State: Negative Energy\n" + "HLLD State: Left Star State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1.5, 0, 0, -1.5, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000001, 2.2286185018000002, 3.3155141874999998}; + Real thermalEnergyFlux = -6.333333333333333; + std::vector const testFluxes = Compute_Fluxes(negativeEnergy, negativeEnergy, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Density\n" + "Right State: Negative Density\n" + "HLLD State: Left State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1E+20, 1e+20, 1e+20, -5e+19, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; + Real thermalEnergyFlux = -1.5000000000000001e+40; + std::vector const testFluxes = Compute_Fluxes(negativeDensity, negativeDensity, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Density, Energy, and Pressure\n" + "Right State: Negative Density, Energy, and Pressure\n" + "HLLD State: Right State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{-1, 1E+20, 1E+20, 1E+20, 1.5E+20, 0, 0, 0}; + std::vector const scalarFlux{-1.1069975296000002e+20, -2.2286185018000002e+20, -3.3155141874999997e+20}; + Real thermalEnergyFlux = 1.5000000000000001e+40; + std::vector const testFluxes = + Compute_Fluxes(negativeDensityEnergyPressure, negativeDensityEnergyPressure, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + { + std::string const outputString{ + "Left State: Negative Density and Pressure\n" + "Right State: Negative Density and Pressure\n" + "HLLD State: Left State"}; + // Compute the fluxes and check for correctness + // Order of Fluxes is rho, vec(V), E, vec(B) + std::vector const fiducialFlux{1, 1e+20, 1e+20, 1e+20, -1.5e+20, 0, 0, 0}; + std::vector const scalarFlux{1.1069975296000002e+20, 2.2286185018000002e+20, 3.3155141874999997e+20}; + Real thermalEnergyFlux = -1.5000000000000001e+40; + std::vector const testFluxes = + Compute_Fluxes(negativeDensityPressure, negativeDensityPressure, gamma, direction); + Check_Results(fiducialFlux, scalarFlux, thermalEnergyFlux, testFluxes, outputString, direction); + } + } +} +// ========================================================================= + +// ========================================================================= +// End of integration tests for the entire HLLD solver. Unit tests are below +// ========================================================================= + +// ========================================================================= +// Unit tests for the contents of the mhd::internal namespace +// ========================================================================= +/*! + * \brief A struct to hold some basic test values + * + */ +namespace +{ +struct TestParams { + // List of cases + std::vector names{"Case 1", "Case 2"}; + + double const gamma = 5. / 3.; + + std::vector const magneticX{92.75101068883114, 31.588767769990532}; + + std::vector stateLVec{ + {21.50306776645775, 1.7906564444824999, 0.33040135813215948, 1.500111692877206, 65.751208381099417, + 12.297499156516622, 46.224045698787776, 9.9999999999999995e-21, 5445.3204350339083}, + {48.316634031589935, 0.39291118391272883, 0.69876195899931859, 1.8528943583250035, 38.461354599479826, + 63.744719695704063, 37.703264551707541, 9.9999999999999995e-21, 3241.38784808316}}, + stateRVec{{81.121773176226498, 0.10110493143718589, 0.17103629446142521, 0.41731155351794952, 18.88982523270516, + 84.991914178754897, 34.852095153095384, 9.9999999999999995e-21, 8605.4286125143772}, + {91.029557388536347, 0.93649399297774782, 0.36277769000180521, 0.095181318599791204, 83.656397841788944, + 35.910258841630984, 24.052685003977757, 9.9999999999999995e-21, 4491.7524579462979}}; + + std::vector const starStateLVec{ + {28.520995251761526, 1.5746306813243216, 1.3948193325212686, 6.579867455284738, 62.093488291430653, + 62.765890944643196}, + {54.721668215064945, 1.4363926014039052, 1.1515754515491903, 30.450436649083692, 54.279167444036723, + 93.267654555096414}}, + starStateRVec{{49.090695707386047, 1.0519818825796206, 0.68198273634686157, 90.44484278669114, 26.835645069149873, + 7.4302316959173442}, + {72.680005044606091, 0.61418047569879897, 0.71813570322922715, 61.33664731346812, + 98.974446283273181, 10.696380763901459}}; + + std::vector totalPressureStar{66.80958736783934, 72.29644038317676}; + + std::vector const DoubleStarStateVec{ + {0.79104271107837087, 0.97609103551927523, 20.943239839455895, 83.380243826880701, 45.832024557076693, + std::nan("0")}, + {1.390870320696683, 0.52222643241336986, 83.851481048702098, 80.366712517307832, 55.455301414557297, + std::nan("0")}}; + + std::vector const flux{ + {12.939239309626116, 65.054814649176265, 73.676928455867824, 16.873647595664387, 52.718887319724693, + 58.989284454159673, 29.976925743532302}, + {81.715245865170729, 56.098850697078028, 2.7172469834037871, 39.701329831928732, 81.63926176158796, + 57.043444592213589, 97.733298271413588}}, + starFlux{{0, 74.90125547448865, 16.989138610622945, 38.541822734846185, 19.095105176247017, 96.239645266242775, + 86.225169282683467}, + {0, 26.812722601652684, 48.349566649914976, 61.228439610525378, 45.432249733131123, 33.053375365947957, + 15.621020824107379}}; + + std::vector const speed{ + {-22.40376497145191, -19.710500632936679, -0.81760587897407833, 9.6740190040662242, 24.295526347371595}, + {-11.190385012513822, -4.4880642018724357, -0.026643804611559244, 3.4191202933087519, 12.519790189404299}}; + + TestParams() = default; +}; +} // namespace +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::approximateLRWaveSpeeds function + * + */ +TEST(tMHDHlldInternalApproximateLRWaveSpeeds, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + std::vector const fiducialSpeedL{-22.40376497145191, -11.190385012513822}; + std::vector const fiducialSpeedR{24.295526347371595, 12.519790189404299}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::Speeds testSpeed = mhd::internal::approximateLRWaveSpeeds( + parameters.stateLVec.at(i), parameters.stateRVec.at(i), parameters.magneticX.at(i), parameters.gamma); + + // Now check results + testing_utilities::Check_Results(fiducialSpeedL[i], testSpeed.L, parameters.names.at(i) + ", SpeedL"); + testing_utilities::Check_Results(fiducialSpeedR.at(i), testSpeed.R, parameters.names.at(i) + ", SpeedR"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::approximateMiddleWaveSpeed function + * + */ +TEST(tMHDHlldInternalApproximateMiddleWaveSpeed, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector const fiducialSpeedM{-0.81760587897407833, -0.026643804611559244}; + + mhd::internal::Speeds testSpeed; + + for (size_t i = 0; i < parameters.names.size(); i++) { + testSpeed.M = mhd::internal::approximateMiddleWaveSpeed(parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialSpeedM.at(i), testSpeed.M, parameters.names.at(i) + ", SpeedM"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::approximateStarWaveSpeed function + * + */ +TEST(tMHDHlldInternalApproximateStarWaveSpeed, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + std::vector const fiducialSpeedStarL{-18.18506608966894, -4.2968910457518161}; + std::vector const fiducialSpeedStarR{12.420292938368167, 3.6786718447209252}; + + mhd::internal::Speeds testSpeed; + + for (size_t i = 0; i < parameters.names.size(); i++) { + testSpeed.LStar = mhd::internal::approximateStarWaveSpeed(parameters.starStateLVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), -1); + testSpeed.RStar = mhd::internal::approximateStarWaveSpeed(parameters.starStateRVec.at(i), parameters.speed.at(i), + parameters.magneticX.at(i), 1); + + // Now check results + testing_utilities::Check_Results(fiducialSpeedStarL.at(i), testSpeed.LStar, + parameters.names.at(i) + ", SpeedStarL"); + testing_utilities::Check_Results(fiducialSpeedStarR.at(i), testSpeed.RStar, + parameters.names.at(i) + ", SpeedStarR"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::_nonStarFluxes function + * + */ +TEST(tMHDHlldInternalNonStarFluxes, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector fiducialFlux{ + {38.504606872151484, -3088.4810263278778, -1127.8835013070616, -4229.5657456907293, -12344.460641662206, + -8.6244637840856555, -56.365490339906408}, + {18.984145880030045, 2250.9966820900618, -2000.3517480656785, -1155.8240512956793, -2717.2127176227905, + 2.9729840344910059, -43.716615275067923}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::Flux testFlux = mhd::internal::nonStarFluxes(parameters.stateLVec.at(i), parameters.magneticX.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityFlux"); + testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumFluxX"); + testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumFluxZ"); + testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticFluxZ"); + testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, parameters.names.at(i) + ", EnergyFlux"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::computeStarState function in the + * non-degenerate case + * + */ +TEST(tMHDHlldInternalComputeStarState, CorrectInputNonDegenerateExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector fiducialStarState{{24.101290139122913, 1.4626377138501221, 5.7559806612277464, + 1023.8840191068900, 18.648382121236992, 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, + 172.06840532772659, 66.595692901872582, 39.389537509454122}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::StarState testStarState = + mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, + parameters.names.at(i) + ", VelocityStarY"); + testing_utilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, + parameters.names.at(i) + ", VelocityStarZ"); + testing_utilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy, + parameters.names.at(i) + ", EnergyStar"); + testing_utilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY, + parameters.names.at(i) + ", MagneticStarY"); + testing_utilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, + parameters.names.at(i) + ", MagneticStarZ"); + } +} + +/*! + * \brief Test the mhd::internal::starFluxes function in the non-degenerate + * case + * + */ +TEST(tMHDHlldInternalStarFluxes, CorrectInputNonDegenerateExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector fiducialFlux{ + {-45.270724071132321, 1369.1771532285088, -556.91765728768155, -2368.4452742393819, -21413.063415617500, + -83.294404848633300, -504.84138754248409}, + {61.395380340435793, 283.48596932136809, -101.75517013858293, -51.34364892516212, -1413.4750762739586, + 25.139956754826922, 78.863254638038882}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::StarState testStarState = + mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); + + mhd::internal::Flux testFlux = + mhd::internal::starFluxes(testStarState, parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); + + // Now check results + testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); + testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY", 1.0E-13); + testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ", 7.0E-13); + } +} + +/*! + * \brief Test the mhd::internal::starFluxes function in the degenerate + * case + * + */ +TEST(tMHDHlldInternalComputeStarState, CorrectInputDegenerateExpectCorrectOutput) +{ + TestParams parameters; + + std::vector fiducialStarState{ + {24.101290139122913, 1.4626377138501221, 5.7559806612277464, 4.5171065808847731e+17, 18.648382121236992, + 70.095850905078336}, + {50.132466596958501, 0.85967712862308099, 1.9480712959548112, 172.06840532772659, 66.595692901872582, + 39.389537509454122}}; + + // Used to get us into the degenerate case + double const totalPressureStarMultiplier = 1E15; + parameters.stateLVec.at(0).totalPressure *= totalPressureStarMultiplier; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::StarState testStarState = + mhd::internal::computeStarState(parameters.stateLVec.at(i), parameters.speed.at(i), parameters.speed.at(i).L, + parameters.magneticX.at(i), parameters.totalPressureStar.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialStarState.at(i).velocityY, testStarState.velocityY, + parameters.names.at(i) + ", VelocityStarY"); + testing_utilities::Check_Results(fiducialStarState.at(i).velocityZ, testStarState.velocityZ, + parameters.names.at(i) + ", VelocityStarZ"); + testing_utilities::Check_Results(fiducialStarState.at(i).energy, testStarState.energy, + parameters.names.at(i) + ", EnergyStar"); + testing_utilities::Check_Results(fiducialStarState.at(i).magneticY, testStarState.magneticY, + parameters.names.at(i) + ", MagneticStarY"); + testing_utilities::Check_Results(fiducialStarState.at(i).magneticZ, testStarState.magneticZ, + parameters.names.at(i) + ", MagneticStarZ"); + } +} + +TEST(tMHDHlldInternalStarFluxes, CorrectInputDegenerateExpectCorrectOutput) +{ + TestParams parameters; + + // Used to get us into the degenerate case + double const totalPressureStarMultiplier = 1E15; + + std::vector fiducialFlux{ + {-144.2887586578122, 1450.1348804310369, -773.30617492819886, -151.70644305354989, 1378.3797024673304, + -1056.6283526454272, -340.62268733874163}, + {10.040447333773272, 284.85426012223729, -499.05932057162761, 336.35271628090368, 171.28451793017882, + 162.96661864443826, -524.05361885198215}}; + + parameters.totalPressureStar.at(0) *= totalPressureStarMultiplier; + parameters.totalPressureStar.at(1) *= totalPressureStarMultiplier; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::Flux testFlux = + mhd::internal::starFluxes(parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), + parameters.speed.at(i), parameters.speed.at(i).L); + + // Now check results + testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux"); + testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); + testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::computeDoubleStarState function. + * Non-degenerate state + * + */ +TEST(tMHDHlldInternalDoubleStarState, CorrectInputNonDegenerateExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector fiducialState{ + {-1.5775383335759607, -3.4914062207842482, 45.259313435283325, 36.670978215630669, -2048.1953674500523, + 1721.0582276783819}, + {3.803188977150934, -4.2662645349592765, 71.787329583230417, 53.189673238238178, -999.79694164635089, + 252.047167522579}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::DoubleStarState const testState = mhd::internal::computeDoubleStarState( + parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), parameters.magneticX.at(i), + parameters.totalPressureStar.at(i), parameters.speed.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testing_utilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testing_utilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testing_utilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testing_utilities::Check_Results(fiducialState.at(i).energyL, testState.energyL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testing_utilities::Check_Results(fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); + } +} + +/*! + * \brief Test the mhd::internal::computeDoubleStarState function in the + * degenerate state. + * + */ +TEST(tMHDHlldInternalDoubleStarState, CorrectInputDegenerateExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector fiducialState{ + {1.0519818825796206, 0.68198273634686157, 26.835645069149873, 7.4302316959173442, 0.0, 90.44484278669114}, + {0.61418047569879897, 0.71813570322922715, 98.974446283273181, 10.696380763901459, 0.0, 61.33664731346812}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::DoubleStarState const testState = + mhd::internal::computeDoubleStarState(parameters.starStateLVec.at(i), parameters.starStateRVec.at(i), 0.0, + parameters.totalPressureStar.at(i), parameters.speed.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialState.at(i).velocityY, testState.velocityY, + parameters.names.at(i) + ", VelocityDoubleStarY"); + testing_utilities::Check_Results(fiducialState.at(i).velocityZ, testState.velocityZ, + parameters.names.at(i) + ", VelocityDoubleStarZ"); + testing_utilities::Check_Results(fiducialState.at(i).magneticY, testState.magneticY, + parameters.names.at(i) + ", MagneticDoubleStarY"); + testing_utilities::Check_Results(fiducialState.at(i).magneticZ, testState.magneticZ, + parameters.names.at(i) + ", MagneticDoubleStarZ"); + testing_utilities::Check_Results(fiducialState.at(i).energyL, testState.energyL, + parameters.names.at(i) + ", EnergyDoubleStarL"); + testing_utilities::Check_Results(fiducialState.at(i).energyR, testState.energyR, + parameters.names.at(i) + ", EnergyDoubleStarR"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::_doubleStarFluxes function + * + */ +TEST(tMHDHlldInternalDoubleStarFluxes, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector const fiducialFlux{ + {-144.2887586578122, 1450.1348804310369, -332.80193639987715, 83.687152337186944, 604.70003506833029, + -245.53635448727721, -746.94190287166407}, + {10.040447333773258, 284.85426012223729, -487.87930516727664, 490.91728596722157, 59.061079503595295, + 30.244176588794346, -466.15336272175193}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + mhd::internal::Flux const testFlux = mhd::internal::computeDoubleStarFluxes( + parameters.DoubleStarStateVec.at(i), parameters.DoubleStarStateVec.at(i).energyL, + parameters.starStateLVec.at(i), parameters.stateLVec.at(i), parameters.flux.at(i), parameters.speed.at(i), + parameters.speed.at(i).L, parameters.speed.at(i).LStar); + + // Now check results + testing_utilities::Check_Results(fiducialFlux[i].density, testFlux.density, + parameters.names.at(i) + ", DensityStarFlux", 5.0E-14); + testing_utilities::Check_Results(fiducialFlux[i].momentumX, testFlux.momentumX, + parameters.names.at(i) + ", MomentumStarFluxX"); + testing_utilities::Check_Results(fiducialFlux[i].momentumY, testFlux.momentumY, + parameters.names.at(i) + ", MomentumStarFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].momentumZ, testFlux.momentumZ, + parameters.names.at(i) + ", MomentumStarFluxZ"); + testing_utilities::Check_Results(fiducialFlux[i].energy, testFlux.energy, + parameters.names.at(i) + ", EnergyStarFlux"); + testing_utilities::Check_Results(fiducialFlux[i].magneticY, testFlux.magneticY, + parameters.names.at(i) + ", MagneticStarFluxY"); + testing_utilities::Check_Results(fiducialFlux[i].magneticZ, testFlux.magneticZ, + parameters.names.at(i) + ", MagneticStarFluxZ"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::_returnFluxes function + * + */ +TEST(tMHDHlldInternalReturnFluxes, CorrectInputExpectCorrectOutput) +{ + double const dummyValue = 999; + mhd::internal::Flux inputFlux{1, 2, 3, 4, 5, 6, 7}; + mhd::internal::State inputState{8, 9, 10, 11, 12, 13, 14, 15, 16}; + + int threadId = 0; + int n_cells = 10; + int nFields = 8; // Total number of conserved fields + #ifdef SCALAR + nFields += NSCALARS; + #endif // SCALAR + #ifdef DE + nFields++; + #endif // DE + + // Lambda for finding indices and check if they're correct + auto findIndex = [](std::vector const &vec, double const &num, int const &fidIndex, std::string const &name) { + int index = std::distance(vec.begin(), std::find(vec.begin(), vec.end(), num)); + EXPECT_EQ(fidIndex, index) << "Error in " << name << " index" << std::endl; + + return index; + }; + + for (size_t direction = 0; direction < 1; direction++) { + int o1, o2, o3; + switch (direction) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; + } + + std::vector testFluxArray(nFields * n_cells, dummyValue); + + // Fiducial Indices + int const fiducialDensityIndex = threadId + n_cells * grid_enum::density; + int const fiducialMomentumIndexX = threadId + n_cells * o1; + int const fiducialMomentumIndexY = threadId + n_cells * o2; + int const fiducialMomentumIndexZ = threadId + n_cells * o3; + int const fiducialEnergyIndex = threadId + n_cells * grid_enum::Energy; + int const fiducialMagneticYIndex = threadId + n_cells * (grid_enum::magnetic_x); + int const fiducialMagneticZIndex = threadId + n_cells * (grid_enum::magnetic_y); + + mhd::internal::returnFluxes(threadId, o1, o2, o3, n_cells, testFluxArray.data(), inputFlux, inputState); + + // Find the indices for the various fields + int densityLoc = findIndex(testFluxArray, inputFlux.density, fiducialDensityIndex, "density"); + int momentumXLocX = findIndex(testFluxArray, inputFlux.momentumX, fiducialMomentumIndexX, "momentum X"); + int momentumYLocY = findIndex(testFluxArray, inputFlux.momentumY, fiducialMomentumIndexY, "momentum Y"); + int momentumZLocZ = findIndex(testFluxArray, inputFlux.momentumZ, fiducialMomentumIndexZ, "momentum Z"); + int energyLoc = findIndex(testFluxArray, inputFlux.energy, fiducialEnergyIndex, "energy"); + int magneticYLoc = findIndex(testFluxArray, inputFlux.magneticY, fiducialMagneticYIndex, "magnetic Y"); + int magneticZLoc = findIndex(testFluxArray, inputFlux.magneticZ, fiducialMagneticZIndex, "magnetic Z"); + + for (size_t i = 0; i < testFluxArray.size(); i++) { + // Skip the already checked indices + if ((i != densityLoc) and (i != momentumXLocX) and (i != momentumYLocY) and (i != momentumZLocZ) and + (i != energyLoc) and (i != magneticYLoc) and (i != magneticZLoc)) { + EXPECT_EQ(dummyValue, testFluxArray.at(i)) << "Unexpected value at index that _returnFluxes shouldn't be " + "touching" + << std::endl + << "Index = " << i << std::endl + << "Direction = " << direction << std::endl; + } + } + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::starTotalPressure function + * + */ +TEST(tMHDHlldInternalStarTotalPressure, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + + std::vector const fiducialPressure{6802.2800807224075, 3476.1984612875144}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real const testPressure = mhd::internal::starTotalPressure(parameters.stateLVec.at(i), parameters.stateRVec.at(i), + parameters.speed.at(i)); + + // Now check results + testing_utilities::Check_Results(fiducialPressure.at(i), testPressure, + parameters.names.at(i) + ", total pressure in the star states"); + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the mhd::internal::loadState function + * + */ +TEST(tMHDHlldInternalLoadState, CorrectInputExpectCorrectOutput) +{ + TestParams const parameters; + int const threadId = 0; + int const n_cells = 10; + std::vector interfaceArray(n_cells * grid_enum::num_fields); + std::iota(std::begin(interfaceArray), std::end(interfaceArray), 1.); + + std::vector const fiducialState{ + {1, 11, 21, 31, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, + {1, 21, 31, 11, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, + {1, 31, 11, 21, 41, 51, 61, 9.9999999999999995e-21, 7462.3749918998346}, + }; + + for (size_t direction = 0; direction < 3; direction++) { + int o1, o2, o3; + switch (direction) { + case 0: + o1 = 1; + o2 = 2; + o3 = 3; + break; + case 1: + o1 = 2; + o2 = 3; + o3 = 1; + break; + case 2: + o1 = 3; + o2 = 1; + o3 = 2; + break; + } + + mhd::internal::State const testState = mhd::internal::loadState(interfaceArray.data(), parameters.magneticX.at(0), + parameters.gamma, threadId, n_cells, o1, o2, o3); + + // Now check results + testing_utilities::Check_Results(fiducialState.at(direction).density, testState.density, ", Density"); + testing_utilities::Check_Results(fiducialState.at(direction).velocityX, testState.velocityX, ", velocityX"); + testing_utilities::Check_Results(fiducialState.at(direction).velocityY, testState.velocityY, ", velocityY"); + testing_utilities::Check_Results(fiducialState.at(direction).velocityZ, testState.velocityZ, ", velocityZ"); + testing_utilities::Check_Results(fiducialState.at(direction).energy, testState.energy, ", energy"); + testing_utilities::Check_Results(fiducialState.at(direction).magneticY, testState.magneticY, ", magneticY"); + testing_utilities::Check_Results(fiducialState.at(direction).magneticZ, testState.magneticZ, ", magneticZ"); + testing_utilities::Check_Results(fiducialState.at(direction).gasPressure, testState.gasPressure, ", gasPressure"); + testing_utilities::Check_Results(fiducialState.at(direction).totalPressure, testState.totalPressure, + ", totalPressure"); + } +} +// ========================================================================= +#endif // MHD diff --git a/src/riemann_solvers/roe_cuda.cu b/src/riemann_solvers/roe_cuda.cu index 88b094468..1735fe24d 100644 --- a/src/riemann_solvers/roe_cuda.cu +++ b/src/riemann_solvers/roe_cuda.cu @@ -1,36 +1,38 @@ /*! \file roe_cuda.cu * \brief Function definitions for the cuda Roe Riemann solver.*/ -#ifdef CUDA - -#include "../utils/gpu.hpp" #include + #include "../global/global.h" #include "../global/global_cuda.h" #include "../riemann_solvers/roe_cuda.h" +#include "../utils/gpu.hpp" -#ifdef DE //PRESSURE_DE -#include "../utils/hydro_utilities.h" +#ifdef DE // PRESSURE_DE + #include "../utils/hydro_utilities.h" #endif -/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields) - * \brief Roe Riemann solver based on the version described in Stone et al, 2008. */ -__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields) +/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, + * int dir, int n_fields) \brief Roe Riemann solver based on the version + * described in Stone et al, 2008. */ +__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields) { // get a thread index - int blockId = blockIdx.x + blockIdx.y*gridDim.x; - int tid = threadIdx.x + blockId * blockDim.x; - int zid = tid / (nx*ny); - int yid = (tid - zid*nx*ny) / nx; - int xid = tid - zid*nx*ny - yid*nx; + int blockId = blockIdx.x + blockIdx.y * gridDim.x; + int tid = threadIdx.x + blockId * blockDim.x; + int zid = tid / (nx * ny); + int yid = (tid - zid * nx * ny) / nx; + int xid = tid - zid * nx * ny - yid * nx; - int n_cells = nx*ny*nz; + int n_cells = nx * ny * nz; Real dl, vxl, mxl, vyl, myl, vzl, mzl, pl, El; Real dr, vxr, mxr, vyr, myr, vzr, mzr, pr, Er; Real etah = 0.0; - Real g1 = gamma - 1.0; + Real g1 = gamma - 1.0; Real Hl, Hr; Real sqrtdl, sqrtdr, vx, vy, vz, H; Real vsq, asq, a; @@ -44,93 +46,99 @@ __global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R sum_0 = sum_1 = sum_2 = sum_3 = sum_4 = 0.0; Real test0, test1, test2, test3, test4; int hlle_flag = 0; - #ifdef DE +#ifdef DE Real dgel, gel, dger, ger, f_ge_l, f_ge_r, E_kin; - #endif - #ifdef SCALAR - Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS], f_scalar_r[NSCALARS]; - #endif +#endif +#ifdef SCALAR + Real dscalarl[NSCALARS], scalarl[NSCALARS], dscalarr[NSCALARS], scalarr[NSCALARS], f_scalar_l[NSCALARS], + f_scalar_r[NSCALARS]; +#endif int o1, o2, o3; - if (dir==0) { - o1 = 1; o2 = 2; o3 = 3; + if (dir == 0) { + o1 = 1; + o2 = 2; + o3 = 3; } - if (dir==1) { - o1 = 2; o2 = 3; o3 = 1; + if (dir == 1) { + o1 = 2; + o2 = 3; + o3 = 1; } - if (dir==2) { - o1 = 3; o2 = 1; o3 = 2; + if (dir == 2) { + o1 = 3; + o2 = 1; + o3 = 2; } // Each thread executes the solver independently - if (xid < nx && yid < ny && zid < nz) - { + if (xid < nx && yid < ny && zid < nz) { // retrieve conserved variables - dl = dev_bounds_L[ tid]; - mxl = dev_bounds_L[o1*n_cells + tid]; - myl = dev_bounds_L[o2*n_cells + tid]; - mzl = dev_bounds_L[o3*n_cells + tid]; - El = dev_bounds_L[4*n_cells + tid]; - #ifdef SCALAR - for (int i=0; i= 0.0) { - dev_flux[ tid] = f_d_l; - dev_flux[o1*n_cells+tid] = f_mx_l; - dev_flux[o2*n_cells+tid] = f_my_l; - dev_flux[o3*n_cells+tid] = f_mz_l; - dev_flux[4*n_cells+tid] = f_E_l; - #ifdef SCALAR - for (int i=0; i lambda_m) { + if (lambda_0 > lambda_m) { if (test0 <= 0.0) { - hlle_flag=1; + hlle_flag = 1; } - if (test4 - 0.5*(test1*test1 + test2*test2 + test3*test3)/test0 < 0.0) { - hlle_flag=2; + if (test4 - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < 0.0) { + hlle_flag = 2; } } test0 += a3 + a4; - test1 += a3*vx; - test2 += a1 + a3*vy; - test3 += a2 + a3*vz; - test4 += a1*vy + a2*vz + a3*0.5*vsq; + test1 += a3 * vx; + test2 += a1 + a3 * vy; + test3 += a2 + a3 * vz; + test4 += a1 * vy + a2 * vz + a3 * 0.5 * vsq; - if(lambda_p > lambda_0) { + if (lambda_p > lambda_0) { if (test0 <= 0.0) { - hlle_flag=1; + hlle_flag = 1; } - if (test4 - 0.5*(test1*test1 + test2*test2 + test3*test3)/test0 < 0.0) { - hlle_flag=2; + if (test4 - 0.5 * (test1 * test1 + test2 * test2 + test3 * test3) / test0 < 0.0) { + hlle_flag = 2; } } - // if pressure or density is negative, and we have not already returned the supersonic fluxes, - // return the HLLE fluxes + // if pressure or density is negative, and we have not already returned + // the supersonic fluxes, return the HLLE fluxes if (hlle_flag != 0) { - - Real cfl, cfr, al, ar, bm, bp, tmp; + Real cfl, cfr, bm, bp, tmp; // compute max and fmin wave speeds - cfl = sqrt(gamma*pl/dl); // sound speed in left state - cfr = sqrt(gamma*pr/dr); // sound speed in right state + cfl = sqrt(gamma * pl / dl); // sound speed in left state + cfr = sqrt(gamma * pr / dr); // sound speed in right state // take max/fmin of Roe eigenvalues and left and right sound speeds - al = fmin(lambda_m, vxl - cfl); - ar = fmax(lambda_p, vxr + cfr); - - bm = fmin(al, (Real) 0.0); - bp = fmax(ar, (Real) 0.0); + bm = fmin(fmin(lambda_m, vxl - cfl), (Real)0.0); + bp = fmax(fmax(lambda_p, vxr + cfr), (Real)0.0); // compute left and right fluxes - f_d_l = mxl - bm*dl; - f_d_r = mxr - bp*dr; + f_d_l = mxl - bm * dl; + f_d_r = mxr - bp * dr; - f_mx_l = mxl*(vxl - bm) + pl; - f_mx_r = mxr*(vxr - bp) + pr; + f_mx_l = mxl * (vxl - bm) + pl; + f_mx_r = mxr * (vxr - bp) + pr; - f_my_l = myl*(vxl - bm); - f_my_r = myr*(vxr - bp); + f_my_l = myl * (vxl - bm); + f_my_r = myr * (vxr - bp); - f_mz_l = mzl*(vxl - bm); - f_mz_r = mzr*(vxr - bp); + f_mz_l = mzl * (vxl - bm); + f_mz_r = mzr * (vxr - bp); - f_E_l = El*(vxl - bm) + pl*vxl; - f_E_r = Er*(vxr - bp) + pr*vxr; + f_E_l = El * (vxl - bm) + pl * vxl; + f_E_r = Er * (vxr - bp) + pr * vxr; - #ifdef DE - f_ge_l = dgel*(vxl - bm); - f_ge_r = dger*(vxr - bp); - #endif +#ifdef DE + f_ge_l = dgel * (vxl - bm); + f_ge_r = dger * (vxr - bp); +#endif - #ifdef SCALAR - for (int i=0; i= 0.0) - dev_flux[(5+i)*n_cells+tid] = dev_flux[tid] * scalarl[i]; - else - dev_flux[(5+i)*n_cells+tid] = dev_flux[tid] * scalarr[i]; + dev_flux[tid] = 0.5 * (f_d_l + f_d_r - sum_0); + dev_flux[o1 * n_cells + tid] = 0.5 * (f_mx_l + f_mx_r - sum_1); + dev_flux[o2 * n_cells + tid] = 0.5 * (f_my_l + f_my_r - sum_2); + dev_flux[o3 * n_cells + tid] = 0.5 * (f_mz_l + f_mz_r - sum_3); + dev_flux[4 * n_cells + tid] = 0.5 * (f_E_l + f_E_r - sum_4); +#ifdef SCALAR + for (int i = 0; i < NSCALARS; i++) { + if (dev_flux[tid] >= 0.0) { + dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarl[i]; + } else { + dev_flux[(5 + i) * n_cells + tid] = dev_flux[tid] * scalarr[i]; + } } - #endif - #ifdef DE - if (dev_flux[tid] >= 0.0) - dev_flux[(n_fields-1)*n_cells+tid] = dev_flux[tid] * gel; - else - dev_flux[(n_fields-1)*n_cells+tid] = dev_flux[tid] * ger; - #endif +#endif +#ifdef DE + if (dev_flux[tid] >= 0.0) { + dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * gel; + } else { + dev_flux[(n_fields - 1) * n_cells + tid] = dev_flux[tid] * ger; + } +#endif } - } - } - } - - -#endif //CUDA diff --git a/src/riemann_solvers/roe_cuda.h b/src/riemann_solvers/roe_cuda.h index 3e7fcc772..bff592876 100644 --- a/src/riemann_solvers/roe_cuda.h +++ b/src/riemann_solvers/roe_cuda.h @@ -1,19 +1,16 @@ /*! \file roe_cuda.h * \brief Declarations of functions for the cuda roe riemann solver kernel. */ -#ifdef CUDA - #ifndef ROE_CUDA_H -#define Roe_CUDA_H +#define ROE_CUDA_H #include "../global/global.h" +/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real + * *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, + * int dir, int n_fields) \brief Roe Riemann solver based on the version + * described in Stone et al, 2008. */ +__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, + int nz, int n_ghost, Real gamma, int dir, int n_fields); -/*! \fn Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, Real *dev_etah, int dir, int n_fields) - * \brief Roe Riemann solver based on the version described in Stone et al, 2008. */ -__global__ void Calculate_Roe_Fluxes_CUDA(Real *dev_bounds_L, Real *dev_bounds_R, Real *dev_flux, int nx, int ny, int nz, int n_ghost, Real gamma, int dir, int n_fields); - - - -#endif //ROE_CUDA_H -#endif //CUDA +#endif // ROE_CUDA_H diff --git a/src/system_tests/cooling_system_tests.cpp b/src/system_tests/cooling_system_tests.cpp index 8b62ef092..71095151c 100644 --- a/src/system_tests/cooling_system_tests.cpp +++ b/src/system_tests/cooling_system_tests.cpp @@ -4,25 +4,18 @@ * */ - // External Libraries and Headers #include -#include // provides std:sin + +#include // provides std:sin // Local includes #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" - - -#ifndef PI -#define PI 3.141592653589793 -#endif - #define COOL_RHO 6.9498489284711 -TEST(tCOOLINGSYSTEMConstant5, - CorrectInputExpectCorrectOutput) +TEST(tCOOLINGSYSTEMConstant5, CorrectInputExpectCorrectOutput) { // dt = 0.3 // rho = COOL_RHO*1e5 @@ -31,61 +24,53 @@ TEST(tCOOLINGSYSTEMConstant5, /* double energy = 0.0014850544057189395;// Python */ - double energy = 0.00148501098087863;// Cholla - systemTest::SystemTestRunner testObject(false, false, false); + double energy = 0.00148501098087863; // Cholla + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",energy); - + testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", energy); } - -TEST(tCOOLINGSYSTEMConstant7, - CorrectInputExpectCorrectOutput) +TEST(tCOOLINGSYSTEMConstant7, CorrectInputExpectCorrectOutput) { // dt = 100 // rho = COOL_RHO*1e5 // pressure = 1e-1 // T = 1e7 // double energy = 0.14982743570299709; // Python - double energy = 0.14982745510047499; // Cholla - systemTest::SystemTestRunner testObject(false, false, false); + double energy = 0.14982745510047499; // Cholla + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",energy); - + testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", energy); } -TEST(tCOOLINGSYSTEMConstant8, - CorrectInputExpectCorrectOutput) +TEST(tCOOLINGSYSTEMConstant8, CorrectInputExpectCorrectOutput) { // dt = 90 // rho = COOL_RHO*1e5 // pressure = 1 // T = 1e8 - + // double energy = 1.499669522009355; // Python - double energy = 1.4996695198095711; // Cholla - systemTest::SystemTestRunner testObject(false, false, false); + double energy = 1.4996695198095711; // Cholla + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",COOL_RHO*1e5); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",energy); - - + testing_utilities::analyticConstant(testObject, "density", COOL_RHO * 1e5); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", energy); } - diff --git a/src/system_tests/gravity_system_tests.cpp b/src/system_tests/gravity_system_tests.cpp index 76cae4d7d..c2a59c40e 100644 --- a/src/system_tests/gravity_system_tests.cpp +++ b/src/system_tests/gravity_system_tests.cpp @@ -20,11 +20,10 @@ * */ /// @{ -TEST(tGRAVITYSYSTEMSphericalCollapse, - CorrectInputExpectCorrectOutput) +TEST(tGRAVITYSYSTEMSphericalCollapse, CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner collapseTest; - collapseTest.runTest(); + system_test::SystemTestRunner collapseTest; + collapseTest.runTest(); } /// @} // ============================================================================= diff --git a/src/system_tests/hydro_system_tests.cpp b/src/system_tests/hydro_system_tests.cpp index 72a6dc349..6cffe9c21 100644 --- a/src/system_tests/hydro_system_tests.cpp +++ b/src/system_tests/hydro_system_tests.cpp @@ -5,93 +5,306 @@ * */ - // External Libraries and Headers #include -#include // provides std:sin + +#include // provides std:sin // Local includes +#include "../io/io.h" #include "../system_tests/system_tester.h" #include "../utils/testing_utilities.h" - - -#ifndef PI -#define PI 3.141592653589793 -#endif - - // ============================================================================= -// Test Suite: tHYDROSYSTEMSodShockTube +// Test Suite: tHYDROtMHDSYSTEMSodShockTube // ============================================================================= /*! - * \defgroup tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput + * \defgroup + * tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput * \brief Test the Sod Shock tube initial conditions as a parameterized test * with varying numbers of MPI ranks * */ /// @{ -class tHYDROSYSTEMSodShockTubeParameterizedMpi - :public - ::testing::TestWithParam +// NOLINTNEXTLINE(readability-identifier-naming) +class tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi : public ::testing::TestWithParam { -protected: - systemTest::SystemTestRunner sodTest; + protected: + system_test::SystemTestRunner sodTest; }; -TEST_P(tHYDROSYSTEMSodShockTubeParameterizedMpi, - CorrectInputExpectCorrectOutput) +TEST_P(tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, CorrectInputExpectCorrectOutput) { - sodTest.numMpiRanks = GetParam(); - sodTest.runTest(); +#ifdef MHD + sodTest.setFixedEpsilon(1.0E-4); + + // Don't test the gas energy fields + auto datasetNames = sodTest.getDataSetsToTest(); + datasetNames.erase(std::remove(datasetNames.begin(), datasetNames.end(), "GasEnergy"), datasetNames.end()); + + // Set the magnetic fiducial datasets to zero + size_t const size = 64 * 64 * 65; + std::vector const magVec(size, 0); + + for (const auto *field : {"magnetic_x", "magnetic_y", "magnetic_z"}) { + sodTest.setFiducialData(field, magVec); + datasetNames.emplace_back(field); + } + + sodTest.setDataSetsToTest(datasetNames); + + double const maxAllowedL1Error = 7.0E-3; + double const maxAllowedError = 4.6E-2; +#else + double const maxAllowedL1Error = 9.4E-5; + double const maxAllowedError = 6.4E-4; +#endif // MHD + + sodTest.numMpiRanks = GetParam(); + sodTest.runTest(true, maxAllowedL1Error, maxAllowedError); } -INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, - tHYDROSYSTEMSodShockTubeParameterizedMpi, +INSTANTIATE_TEST_SUITE_P(CorrectInputExpectCorrectOutput, tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi, ::testing::Values(1, 2, 4)); /// @} // ============================================================================= -TEST(tHYDROSYSTEMConstant, - CorrectInputExpectCorrectOutput) +TEST(tHYDROSYSTEMSodShockTube, OneDimensionalCorrectInputExpectCorrectOutput) +{ + system_test::SystemTestRunner sod_test; + sod_test.runTest(); +} + +TEST(tHYDROSYSTEMSodShockTube, TwoDimensionalCorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner sod_test; + sod_test.runTest(); +} + +TEST(tHYDROtMHDSYSTEMConstant, CorrectInputExpectCorrectOutput) +{ + system_test::SystemTestRunner testObject(false, false, false); testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticConstant(testObject,"density",1.0); - testingUtilities::analyticConstant(testObject,"momentum_x",0.0); - testingUtilities::analyticConstant(testObject,"momentum_y",0.0); - testingUtilities::analyticConstant(testObject,"momentum_z",0.0); - testingUtilities::analyticConstant(testObject,"Energy",1.5e-5); - + testing_utilities::analyticConstant(testObject, "density", 1.0); + testing_utilities::analyticConstant(testObject, "momentum_x", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_y", 0.0); + testing_utilities::analyticConstant(testObject, "momentum_z", 0.0); + testing_utilities::analyticConstant(testObject, "Energy", 1.5e-5); } - -TEST(tHYDROSYSTEMSoundWave3D, - CorrectInputExpectCorrectOutput) +TEST(tHYDROtMHDSYSTEMSoundWave3D, CorrectInputExpectCorrectOutput) { - double time = 0.05; + double time = 0.05; double amplitude = 1e-5; - double dx = 1./64.; - - double real_kx = 2*PI;//kx of the physical problem - - double kx = real_kx * dx; - double speed = 1;//speed of wave is 1 since P = 0.6 and gamma = 1.666667 - double phase = kx*0.5 - speed * time * real_kx; //kx*0.5 for half-cell offset + double dx = 1. / 64.; + + double real_kx = 2 * M_PI; // kx of the physical problem + + double kx = real_kx * dx; + double speed = 1; // speed of wave is 1 since P = 0.6 and gamma = 1.666667 + double phase = kx * 0.5 - speed * time * real_kx; // kx*0.5 for half-cell offset double tolerance = 1e-7; - systemTest::SystemTestRunner testObject(false, false, false); + system_test::SystemTestRunner testObject(false, false, false); + +#ifdef MHD + // Loosen correctness check to account for MHD only having PCM. This is + // about the error between PCM and PPMP in hydro + // Check Results. Values based on results in Gardiner & Stone 2008 + #ifdef PCM + tolerance = 1e-6; + #elif defined(PLMC) + tolerance = 1.0E-7; + #elif defined(PPMC) + tolerance = 1.9E-9; + #endif // PCM +#endif // MHD testObject.launchCholla(); testObject.openHydroTestData(); - testingUtilities::analyticSine(testObject,"density",1.0,amplitude,kx,0.0,0.0,phase,tolerance); - testingUtilities::analyticSine(testObject,"momentum_x",0.0,amplitude,kx,0.0,0.0,phase,tolerance); - //testingUtilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); - //testingUtilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); + ASSERT_NO_FATAL_FAILURE( + testing_utilities::analyticSine(testObject, "density", 1.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); + ASSERT_NO_FATAL_FAILURE( + testing_utilities::analyticSine(testObject, "momentum_x", 0.0, amplitude, kx, 0.0, 0.0, phase, tolerance)); + // testing_utilities::analyticSine(testObject,"momentum_y",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); + // testing_utilities::analyticSine(testObject,"momentum_z",0.0,amplitude,kx,0.0,0.0,0.0,tolerance); +} + +// ============================================================================= +// Test Suite: tHYDROtMHDSYSTEMLinearWavesParameterizedMpi +// ============================================================================= +/*! + * \defgroup tHYDROtMHDSYSTEMLinearWavesParameterizedMpi + * \brief Test the linear waves initial conditions as a parameterized test + * with varying numbers of MPI ranks. + * + */ +/// @{ +// NOLINTNEXTLINE(readability-identifier-naming) +class tHYDROtMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam +{ + public: + tHYDROtMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){}; + + protected: + system_test::SystemTestRunner waveTest; + +#ifdef PCM + double static constexpr allowedL1Error = 4E-7; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 4E-7; +#elif defined(PLMC) + double static constexpr allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 1E-7; +#elif defined(PLMP) + double static constexpr allowedL1Error = 1E-7; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 1E-7; +#elif defined(PPMC) + double static constexpr allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 2.7E-8; +#elif defined(PPMP) + double static constexpr allowedL1Error = 2.7E-8; // Based on results in Gardiner & Stone 2008 + double static constexpr allowedError = 2.7E-8; +#endif + + void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &vx = 0.0) + { + // Constant for all tests + size_t const N = 32; + double const domain = 0.5; + double const gamma = 5. / 3.; + double const tOut = 2 * domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2 * N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2 * domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=0"); + waveTest.chollaLaunchParams.append(" By=0"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_By=0"); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=0"); + } +}; + +// Sound Waves Moving Left and Right +// ================================= +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError); +} + +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, SoundWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.; + int const numTimeSteps = 214; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = -1; + double const rEigenVec_MomentumY = 1; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_E = 1.5; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(2 * allowedL1Error, allowedError); +} + +// Contact Waves Moving Left and Right +// =================================== +TEST_P(tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, HydroContactWaveCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + int const numTimeSteps = 427; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, velocityX); + + // Set the number of MPI ranks + waveTest.numMpiRanks = GetParam(); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results + waveTest.runL1ErrorTest(allowedL1Error, allowedError); } + +INSTANTIATE_TEST_SUITE_P(, tHYDROtMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1)); +/// @} +// ============================================================================= \ No newline at end of file diff --git a/src/system_tests/input_files/blank_settings_file.txt b/src/system_tests/input_files/blank_settings_file.txt new file mode 100644 index 000000000..e8fbd7e77 --- /dev/null +++ b/src/system_tests/input_files/blank_settings_file.txt @@ -0,0 +1,3 @@ +# This is blank file for system tests that are setting all the parameters +# internally to point at. Without a blank file cholla will crash + diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..dd54ff082 --- /dev/null +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_OneDimensionalCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 1D Sod Shock tube +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=1 +# number of grid cells in the z dimension +nz=1 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# density of right state +rho_r=0.1 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.1 +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..c89e179be --- /dev/null +++ b/src/system_tests/input_files/tHYDROSYSTEMSodShockTube_TwoDimensionalCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,56 @@ +# +# Parameter File for 1D Sod Shock tube +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=1 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# density of right state +rho_r=0.1 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.1 +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 diff --git a/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt new file mode 100644 index 000000000..38686bfbd --- /dev/null +++ b/src/system_tests/input_files/tHYDROtMHDReadGridHdf5_RestartSlowWaveExpectCorrectOutput.txt @@ -0,0 +1,72 @@ +# +# Parameter File for MHD slow magnetosonic wave +# See [this blog post](https://robertcaddy.com/posts/Classes-and-bugfixing-6/) +# for details on each wave +# The right eigenvector for this wave is: +# (1/(6*sqrt(5))) * [12, +/-6, +/-8*sqrt(2), +/-4, 0, -4*sqrt(2), -2, 9] +# The terms with two sign options: use the left one for right moving waves and +# the right one for left moving waves +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Linear_Wave +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=0.5 +zlen=0.5 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=0 +# velocity in the y direction +vy=0 +# velocity in the z direction +vz=0 +# initial pressure +P=0.6 +# magnetic field in the x direction +Bx=1 +# magnetic field in the y direction +By=1.5 +# magnetic field in the z direction +Bz=0 +# amplitude of perturbing oscillations +A=1e-6 +# value of gamma +gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=0.8944271909999159 +rEigenVec_MomentumX=0.4472135954999579 +rEigenVec_MomentumY=0.8944271909999159 +rEigenVec_MomentumZ=0.0 +rEigenVec_Bx=0.0 +rEigenVec_By=-0.4472135954999579 +rEigenVec_Bz=0.0 +rEigenVec_E=0.6708203932499369 + diff --git a/src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt similarity index 100% rename from src/system_tests/input_files/tHYDROSYSTEMConstant_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMConstant_CorrectInputExpectCorrectOutput.txt diff --git a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt similarity index 95% rename from src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt index 6fb66732b..efcd912fd 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSodShockTubeParameterizedMpi_CorrectInputExpectCorrectOutput.txt @@ -25,10 +25,10 @@ zlen=1.0 # type of boundary conditions xl_bcnd=3 xu_bcnd=3 -yl_bcnd=0 -yu_bcnd=0 -zl_bcnd=0 -zu_bcnd=0 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 # path to output directory outdir=./ @@ -54,4 +54,3 @@ P_r=0.1 diaph=0.5 # value of gamma gamma=1.4 - diff --git a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt similarity index 70% rename from src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt rename to src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt index f1c23ea6e..efdedaceb 100644 --- a/src/system_tests/input_files/tHYDROSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt +++ b/src/system_tests/input_files/tHYDROtMHDSYSTEMSoundWave3D_CorrectInputExpectCorrectOutput.txt @@ -14,7 +14,7 @@ tout=0.05 # time interval for output outstep=0.05 # name of initial conditions -init=Sound_Wave +init=Linear_Wave # domain properties xmin=0.0 ymin=0.0 @@ -34,18 +34,31 @@ outdir=./ ################################################# # Parameters for linear wave problems -# initial density +# initial density rho=1.0 -# velocity in the x direction +# velocity in the x direction vx=0 # velocity in the y direction vy=0 # velocity in the z direction vz=0 -# initial pressure +# initial pressure P=0.6 # amplitude of perturbing oscillations A=1e-5 # value of gamma gamma=1.666666666666667 +# The right eigenvectors to set the wave properly +rEigenVec_rho=1 +rEigenVec_MomentumX=1 +rEigenVec_MomentumY=1 +rEigenVec_MomentumZ=1 +rEigenVec_E=1.5 +# Set the magnetic field quantities to zero +Bx=0 +By=0 +Bz=0 +rEigenVec_Bx=0 +rEigenVec_By=0 +rEigenVec_Bz=0 \ No newline at end of file diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..d6a733d3c --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_AdvectingFieldLoopCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,55 @@ +# +# Parameter File for an MHD Advecting Field Loop as defined in +# [Gardiner & Stone 2008](https://ui.adsabs.harvard.edu/abs/2008JCoPh.227.4123G/abstract) +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=2.0 +# time interval for output +outstep=2.0 +# name of initial conditions +init=Advecting_Field_Loop +# domain properties +xmin=-0.5 +ymin=-0.5 +zmin=-1.0 +xlen=1.0 +ylen=1.0 +zlen=2.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for linear wave problems +# initial density +rho=1.0 +# velocity in the x direction +vx=1.0 +# velocity in the y direction +vy=1.0 +# velocity in the z direction +vz=2.0 +# initial pressure +P=1.0 +# amplitude of the loop/magnetic field background value +A=0.001 +# Radius of the Loop +radius=0.3 + +# value of gamma +gamma=1.666666666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..514dd3359 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_BrioAndWuShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,72 @@ +# +# Parameter File for 3D Brio & Wu MHD shock tube +# Citation: Brio & Wu 1988 "An Upwind Differencing Scheme for the Equations of +# Ideal Magnetohydrodynamics" +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.1 +# time interval for output +outstep=0.1 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0 +vy_l=0 +vz_l=0 +# pressure of left state +P_l=1.0 +# Magnetic field of the left state +Bx_l=0.75 +By_l=1.0 +Bz_l=0.0 + +# density of right state +rho_r=0.128 +# velocity of right state +vx_r=0 +vy_r=0 +vz_r=0 +# pressure of right state +P_r=0.1 +# Magnetic field of the right state +Bx_r=0.75 +By_r=-1.0 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=2.0 + diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..eabea0e60 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithMagneticFieldCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,50 @@ +# +# Parameter File for 3D box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# final output time +tout=100000.0 +# time interval for output +outstep=100000.0 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=1e4 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1.380658e-5 +# Magnetic Field +Bx=1.0e-5 +By=2.0e-5 +Bz=3.0e-5 +# value of gamma +gamma=1.666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..4f52b7cd6 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,50 @@ +# +# Parameter File for 3D box filled with gas +# + +################################################ +# number of grid cells in the x dimension +nx=16 +# number of grid cells in the y dimension +ny=16 +# number of grid cells in the z dimension +nz=16 +# final output time +tout=100000.0 +# time interval for output +outstep=100000.0 +# name of initial conditions +init=Constant +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# density +rho=1e4 +# velocity +vx=0 +vy=0 +vz=0 +# pressure +P=1.380658e-5 +# Magnetic Field +Bx=0.0 +By=0.0 +Bz=0.0 +# value of gamma +gamma=1.666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..538984951 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,73 @@ +# +# Parameter File for 3D Dai & Woodward MHD shock tube +# Citation: Dai & Woodward 1998 "On The Diverrgence-Free Condition and +# Conservation Laws in Numerical Simulations for Supersonic Magnetohydrodynamic +# Flows" +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.08 +# velocity of left state +vx_l=1.2 +vy_l=0.01 +vz_l=0.5 +# pressure of left state +P_l=0.95 +# Magnetic field of the left state +Bx_l=0.5641895835477563 +By_l=1.0155412503859613 +Bz_l=0.5641895835477563 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=0.5641895835477563 +By_r=1.1283791670955126 +Bz_r=0.5641895835477563 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 + diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..3e4747551 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,71 @@ +# +# Parameter File for 3D Einfeldt Strong Rarefaction MHD test +# Citation: Einfeldt et al. 1991 "On Godunov-Type Methods near Low Densities" +# + +################################################ +# number of grid cells in the x dimension +nx=32 +# number of grid cells in the y dimension +ny=32 +# number of grid cells in the z dimension +nz=32 +# final output time +tout=0.16 +# time interval for output +outstep=0.16 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=-2.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=0.45 +# Magnetic field of the left state +Bx_l=0.0 +By_l=0.5 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=2.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=0.45 +# Magnetic field of the right state +Bx_r=0.0 +By_r=0.5 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.4 + diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..77ef94b72 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_MhdBlastWaveCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,61 @@ +# +# Parameter File for the MHD Blast wavelength +# See [Stone & Gardiner 2009](https://ui.adsabs.harvard.edu/abs/2009NewA...14..139S/abstract) for details. +# + +################################################ +# number of grid cells in the x dimension +nx=50 +# number of grid cells in the y dimension +ny=100 +# number of grid cells in the z dimension +nz=50 +# final output time +tout=0.2 +# time interval for output +outstep=0.2 +# name of initial conditions +init=MHD_Spherical_Blast +# domain properties +xmin=-0.5 +ymin=-0.75 +zmin=-0.5 +xlen=1.0 +ylen=1.5 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for MHD Blast Wave problem + +# initial density +rho=1.0 +# velocity in the x direction +vx=0.0 +# velocity in the y direction +vy=0.0 +# velocity in the z direction +vz=0.0 +# initial pressure outside the blast zone +P=0.1 +# initial pressure inside the blast zone. Note that the paper says this should be 100, that is a typo +P_blast=10.0 +# The radius of the blast zone +radius=0.1 +# magnetic field in the x direction. Equal to 1/sqrt(2) +Bx=0.70710678118654746 +# magnetic field in the y direction. Equal to 1/sqrt(2) +By=0.70710678118654746 +# magnetic field in the z direction +Bz=0.0 + +# value of gamma +gamma=1.666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..332e26eb2 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_OrszagTangVortexCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,42 @@ +# +# Parameter File for the Orszag-Tang Vortex +# See [Gardiner & Stone 2008](https://arxiv.org/abs/0712.2634) +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.5 +# time interval for output +outstep=0.5 +# name of initial conditions +init=Orszag_Tang_Vortex +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 +# type of boundary conditions +xl_bcnd=1 +xu_bcnd=1 +yl_bcnd=1 +yu_bcnd=1 +zl_bcnd=1 +zu_bcnd=1 +# path to output directory +outdir=./ + +################################################# +# Parameters for Orszag-Tang Vortex. This problem is defined for a specific set +# of initial conditions which have been hard coded into the initial conditions +# function. The only thing that needs set here is the adiabatic index + +# value of gamma +gamma=1.666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..a03aef938 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 1a. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 1a +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.08 +# time interval for output +outstep=0.08 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=10.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=20.0 +# Magnetic field of the left state +Bx_l=1.4104739588693909 +By_l=1.4104739588693909 +Bz_l=0.0 + +# density of right state +rho_r=1.0 +# velocity of right state +vx_r=-10.0 +vy_r=0.0 +vz_r=0.0 +# pressure of right state +P_r=1.0 +# Magnetic field of the right state +Bx_r=1.4104739588693909 +By_r=1.4104739588693909 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt new file mode 100644 index 000000000..6596c2b01 --- /dev/null +++ b/src/system_tests/input_files/tMHDSYSTEMParameterizedMpi_RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput.txt @@ -0,0 +1,74 @@ +# +# Parameter File for 3D Ryu & Jones MHD shock tube 4d. +# Citation: Ryu & Jones 1995 "Numerical Magnetohydrodynamics in Astrophysics: +# Algorithms and Tests for One-Dimensional Flow" +# +# Note: There are many shock tubes in this paper. This settings file is +# specifically for shock tube 4d +# + +################################################ +# number of grid cells in the x dimension +nx=64 +# number of grid cells in the y dimension +ny=64 +# number of grid cells in the z dimension +nz=64 +# final output time +tout=0.16 +# time interval for output +outstep=0.16 +# name of initial conditions +init=Riemann + +# domain properties +xmin=0.0 +ymin=0.0 +zmin=0.0 +xlen=1.0 +ylen=1.0 +zlen=1.0 + +# type of boundary conditions +xl_bcnd=3 +xu_bcnd=3 +yl_bcnd=3 +yu_bcnd=3 +zl_bcnd=3 +zu_bcnd=3 + +# path to output directory +outdir=./ + +################################################# +# Parameters for 1D Riemann problems +# density of left state +rho_l=1.0 +# velocity of left state +vx_l=0.0 +vy_l=0.0 +vz_l=0.0 +# pressure of left state +P_l=1.0 +# Magnetic field of the left state +Bx_l=0.7 +By_l=0.0 +Bz_l=0.0 + +# density of right state +rho_r=0.3 +# velocity of right state +vx_r=0.0 +vy_r=0.0 +vz_r=1.0 +# pressure of right state +P_r=0.2 +# Magnetic field of the right state +Bx_r=0.7 +By_r=1.0 +Bz_r=0.0 + +# location of initial discontinuity +diaph=0.5 +# value of gamma +gamma=1.6666666666666667 diff --git a/src/system_tests/mhd_system_tests.cpp b/src/system_tests/mhd_system_tests.cpp new file mode 100644 index 000000000..a14caa9a1 --- /dev/null +++ b/src/system_tests/mhd_system_tests.cpp @@ -0,0 +1,934 @@ +/*! + * \file mhd_system_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains all the system tests for the MHD build type + * + */ + +// STL includes +#include + +// External Libraries and Headers +#include + +// Local includes +#include "../io/io.h" +#include "../system_tests/system_tester.h" +#include "../utils/testing_utilities.h" + +// ============================================================================= +// Test Suite: tMHDSYSTEMLinearWavesParameterizedAngle +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMLinearWavesParameterizedAngle + * \brief Test the linear waves initial conditions as a parameterized test + * with varying angles. Details in Gardiner & Stone 2008 + * + */ +/// @{ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDSYSTEMLinearWavesParameterizedAngle : public ::testing::TestWithParam> +{ + public: + tMHDSYSTEMLinearWavesParameterizedAngle() : waveTest(false, true, false, false){}; + + protected: + system_test::SystemTestRunner waveTest; + inline static std::unordered_map high_res_l2norms; + + void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz, double const &pitch, double const &yaw, double const &domain, + int const &domain_direction, double const &vx = 0.0, size_t const &N = 32) + { + // Constant for all tests + double const gamma = 5. / 3.; + double const tOut = 2 * domain / waveSpeed; + + // Define vector values + double x_len = domain, y_len = domain, z_len = domain; + int nx = N, ny = N, nz = N; + double vx_rot = vx, vy_rot = 0, vz_rot = 0; + double Bx_rot = 1, By_rot = 1.5, Bz_rot = 0; + + double rEigenVec_Bx_rot = rEigenVec_Bx; + double rEigenVec_By_rot = rEigenVec_By; + double rEigenVec_Bz_rot = rEigenVec_Bz; + + double rEigenVec_MomentumX_rot = rEigenVec_MomentumX; + double rEigenVec_MomentumY_rot = rEigenVec_MomentumY; + double rEigenVec_MomentumZ_rot = rEigenVec_MomentumZ; + + switch (domain_direction) { + case 1: + x_len *= 2; + nx *= 2; + break; + case 2: // swap X and Y + y_len *= 2; + ny *= 2; + break; + case 3: // swap X and Z + z_len *= 2; + nz *= 2; + break; + default: + throw std::invalid_argument("Invalid value of domain_direction given to Set_Launch_Params"); + break; + } + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx_rot)); + waveTest.chollaLaunchParams.append(" vy=" + to_string_exact(vy_rot)); + waveTest.chollaLaunchParams.append(" vz=" + to_string_exact(vz_rot)); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=" + to_string_exact(Bx_rot)); + waveTest.chollaLaunchParams.append(" By=" + to_string_exact(By_rot)); + waveTest.chollaLaunchParams.append(" Bz=" + to_string_exact(Bz_rot)); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By_rot)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz_rot)); + waveTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); + waveTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + } +}; + +// Fast Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; // + for left wave + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + +// Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(6.11E-8, 5.5E-8); +#endif // PCM + + high_res_l2norms["fast_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {214, 204, 220}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * -4; + double const rEigenVec_MomentumY = prefix * 2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + +// Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.2E-7, 5.4E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(6.5E-8, 6.5E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(6.1E-8, 5.5E-8); +#endif // PCM +} + +// Slow Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); +#endif // PCM + + high_res_l2norms["slow_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {854, 813, 880}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.45E-9, 1.3E-9); +#endif // PCM +} + +// Alfven Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); +#endif // PCM + + high_res_l2norms["alfven_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {427, 407, 440}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.95e-09, 2.16e-09); +#endif // PCM +} + +// Contact Wave Moving Right +// =================================== +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {641, 620, 654}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, + velocityX); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + +// Check Results +// Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(5.4E-7, 5.4E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(3.0E-8, 3.0E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.41e-09, 1.5E-09); +#endif // PCM + + high_res_l2norms["contact_" + std::to_string(domain_direction)] = waveTest.getL2Norm(); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, FastMagnetosonicWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 2.; + std::vector const numTimeSteps = {107, 102, 110}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 2; + double const rEigenVec_MomentumX = prefix * 4; + double const rEigenVec_MomentumY = prefix * -2; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * 4; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 9; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, + 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(7.0E-8, 1.5E-7); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["fast_" + std::to_string(domain_direction)], + "", 0.2); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, SlowMagnetosonicWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 0.5; + std::vector const numTimeSteps = {427, 407, 440}; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, + 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(5.4E-8, 8.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["slow_" + std::to_string(domain_direction)], + "", 0.2); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, AlfvenWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {214, 204, 220}; + + double const rEigenVec_rho = 0; + double const rEigenVec_MomentumX = 0; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = -1; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 1; + double const rEigenVec_E = 0; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, 0.0, + 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(4.5E-8, 8.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testing_utilities::Check_Results(4.0, low_res_l2norm / high_res_l2norms["alfven_" + std::to_string(domain_direction)], + "", 0.2); +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedAngle, MHDContactWaveExpectSecondOrderConvergence) +{ + // Get the test parameters + auto [pitch, yaw, domain, domain_direction] = GetParam(); + + // Specific to this test + double const waveSpeed = 1.0; + std::vector const numTimeSteps = {321, 310, 327}; + + double const rEigenVec_rho = 1; + double const rEigenVec_MomentumX = 1; + double const rEigenVec_MomentumY = 0; + double const rEigenVec_MomentumZ = 0; + double const rEigenVec_Bx = 0; + double const rEigenVec_By = 0; + double const rEigenVec_Bz = 0; + double const rEigenVec_E = 0.5; + double const velocityX = waveSpeed; + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz, pitch, yaw, domain, domain_direction, + velocityX, 16); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps[domain_direction - 1]); + + // Run the wave + waveTest.runL1ErrorTest(5.0E-8, 8.0E-8); + + // Check the scaling + double const low_res_l2norm = waveTest.getL2Norm(); + testing_utilities::Check_Results( + 4.0, low_res_l2norm / high_res_l2norms["contact_" + std::to_string(domain_direction)], "", 0.2); +} + +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedAngle, + ::testing::Values(std::make_tuple(0.0 * M_PI, 0.0 * M_PI, 0.5, 1), + std::make_tuple(0.0 * M_PI, 0.5 * M_PI, 0.5, 2), + std::make_tuple(0.5 * M_PI, 0.0 * M_PI, 0.5, 3) + // std::make_tuple(std::asin(2./3.), + // std::asin(2./std::sqrt(5.)), 1.5, 1) + )); +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMLinearWavesParameterizedMpi +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMLinearWavesParameterizedMpi + * \brief Test the linear waves initial conditions as a parameterized test + * with varying numbers of MPI ranks. Details in Gardiner & Stone 2008 + * + */ +/// @{ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDSYSTEMLinearWavesParameterizedMpi : public ::testing::TestWithParam +{ + public: + tMHDSYSTEMLinearWavesParameterizedMpi() : waveTest(false, true, false, false){}; + + protected: + system_test::SystemTestRunner waveTest; + + void Set_Launch_Params(double const &waveSpeed, double const &rEigenVec_rho, double const &rEigenVec_MomentumX, + double const &rEigenVec_MomentumY, double const &rEigenVec_MomentumZ, + double const &rEigenVec_E, double const &rEigenVec_Bx, double const &rEigenVec_By, + double const &rEigenVec_Bz) + { + // Constant for all tests + size_t const N = 32; + double const gamma = 5. / 3.; + double const domain = 0.5; + double const tOut = 2 * domain / waveSpeed; + + // Settings + waveTest.chollaLaunchParams.append(" nx=" + to_string_exact(2 * N)); + waveTest.chollaLaunchParams.append(" ny=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" nz=" + to_string_exact(N)); + waveTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + waveTest.chollaLaunchParams.append(" init=Linear_Wave"); + waveTest.chollaLaunchParams.append(" xmin=0.0"); + waveTest.chollaLaunchParams.append(" ymin=0.0"); + waveTest.chollaLaunchParams.append(" zmin=0.0"); + waveTest.chollaLaunchParams.append(" xlen=" + to_string_exact(2 * domain)); + waveTest.chollaLaunchParams.append(" ylen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" zlen=" + to_string_exact(domain)); + waveTest.chollaLaunchParams.append(" xl_bcnd=1"); + waveTest.chollaLaunchParams.append(" xu_bcnd=1"); + waveTest.chollaLaunchParams.append(" yl_bcnd=1"); + waveTest.chollaLaunchParams.append(" yu_bcnd=1"); + waveTest.chollaLaunchParams.append(" zl_bcnd=1"); + waveTest.chollaLaunchParams.append(" zu_bcnd=1"); + waveTest.chollaLaunchParams.append(" rho=1.0"); + waveTest.chollaLaunchParams.append(" vx=0"); + waveTest.chollaLaunchParams.append(" vy=0"); + waveTest.chollaLaunchParams.append(" vz=0"); + waveTest.chollaLaunchParams.append(" P=" + to_string_exact(1 / gamma)); + waveTest.chollaLaunchParams.append(" Bx=1"); + waveTest.chollaLaunchParams.append(" By=1.5"); + waveTest.chollaLaunchParams.append(" Bz=0"); + waveTest.chollaLaunchParams.append(" A='1e-6'"); + waveTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + waveTest.chollaLaunchParams.append(" rEigenVec_rho=" + to_string_exact(rEigenVec_rho)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumX=" + to_string_exact(rEigenVec_MomentumX)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumY=" + to_string_exact(rEigenVec_MomentumY)); + waveTest.chollaLaunchParams.append(" rEigenVec_MomentumZ=" + to_string_exact(rEigenVec_MomentumZ)); + waveTest.chollaLaunchParams.append(" rEigenVec_E=" + to_string_exact(rEigenVec_E)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bx=" + to_string_exact(rEigenVec_Bx)); + waveTest.chollaLaunchParams.append(" rEigenVec_By=" + to_string_exact(rEigenVec_By)); + waveTest.chollaLaunchParams.append(" rEigenVec_Bz=" + to_string_exact(rEigenVec_Bz)); + } +}; + +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMLinearWavesParameterizedMpi, ::testing::Values(1, 2, 4)); + +// Slow Magnetosonic Waves Moving Left and Right +// ============================================= +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveRightMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * 2; + double const rEigenVec_MomentumY = prefix * 4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.75E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); +#endif // PCM +} + +TEST_P(tMHDSYSTEMLinearWavesParameterizedMpi, SlowMagnetosonicWaveLeftMovingCorrectInputExpectCorrectOutput) +{ + // Specific to this test + double const waveSpeed = 0.5; + int const numTimeSteps = 854; + + double const prefix = 1. / (2 * std::sqrt(5)); + double const rEigenVec_rho = prefix * 4; + double const rEigenVec_MomentumX = prefix * -2; + double const rEigenVec_MomentumY = prefix * -4; + double const rEigenVec_MomentumZ = prefix * 0; + double const rEigenVec_Bx = prefix * 0; + double const rEigenVec_By = prefix * -2; + double const rEigenVec_Bz = prefix * 0; + double const rEigenVec_E = prefix * 3; + + // Get the test parameters + waveTest.numMpiRanks = GetParam(); + + // Set the launch parameters + Set_Launch_Params(waveSpeed, rEigenVec_rho, rEigenVec_MomentumX, rEigenVec_MomentumY, rEigenVec_MomentumZ, + rEigenVec_E, rEigenVec_Bx, rEigenVec_By, rEigenVec_Bz); + + // Set the number of timesteps + waveTest.setFiducialNumTimeSteps(numTimeSteps); + + // Check Results. Values based on results in Gardiner & Stone 2008 +#ifdef PCM + waveTest.runL1ErrorTest(4.E-7, 4.E-7); +#elif defined(PLMC) + waveTest.runL1ErrorTest(2.0E-8, 2.8E-8); +#elif defined(PPMC) + waveTest.runL1ErrorTest(1.4E-9, 1.3E-9); +#endif // PCM +} + +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMParameterizedMpi +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMParameterizedMpi + * \brief Test initial conditions as a parameterized test with varying numbers of MPI ranks + * + */ +/// @{ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDSYSTEMParameterizedMpi : public ::testing::TestWithParam +{ + protected: + system_test::SystemTestRunner test_runner; +}; +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMParameterizedMpi, ::testing::Values(1, 2, 4)); + +/// Test constant state with all magnetic fields set to zero +TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithZeroMagneticFieldCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test constant state with all magnetic fields set to one +TEST_P(tMHDSYSTEMParameterizedMpi, ConstantWithMagneticFieldCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the MHD Einfeldt Strong Rarefaction (Einfeldt et al. 1991) +TEST_P(tMHDSYSTEMParameterizedMpi, EinfeldtStrongRarefactionCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Brio & Wu Shock Tube (Brio & Wu 1988) +TEST_P(tMHDSYSTEMParameterizedMpi, BrioAndWuShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Dai & Woodward Shock Tube (Dai & Woodward 1998) +TEST_P(tMHDSYSTEMParameterizedMpi, DaiAndWoodwardShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Ryu & Jones 1a Shock Tube (Ryu & Jones 1995) +TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones1aShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} + +/// Test the Ryu & Jones 4d Shock Tube (Ryu & Jones 1995) +TEST_P(tMHDSYSTEMParameterizedMpi, RyuAndJones4dShockTubeCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + // This test is particularly sensitive to minor changes in the initial conditions, the kind of changes that are + // expected from compiler to compiler. As such the limits have been loosened slightly. + test_runner.setFixedEpsilon(7.3E-12); + test_runner.runTest(); +} + +/// Test the Advecting Field Loop +TEST_P(tMHDSYSTEMParameterizedMpi, AdvectingFieldLoopCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + + // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems + test_runner.runTest(true, 3.9E-8, 2.25E-6); +} + +/// Test the MHD Blast Wave +TEST_P(tMHDSYSTEMParameterizedMpi, MhdBlastWaveCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + + // Only do the L2 Norm test. The regular cell-to-cell comparison is brittle for this test across systems + test_runner.runTest(true, 2.2E-4, 0.35); +} + +/// Test the Orszag-Tang Vortex +TEST_P(tMHDSYSTEMParameterizedMpi, OrszagTangVortexCorrectInputExpectCorrectOutput) +{ + test_runner.numMpiRanks = GetParam(); + test_runner.runTest(); +} +/// @} +// ============================================================================= + +// ============================================================================= +// Test Suite: tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization +// ============================================================================= +/*! + * \defgroup tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization + * \brief Test the circularly polarized Alfven Wave conditions as a parameterized test with varying polarizations. + * Details in Gardiner & Stone 2008 + * + */ +/// @{ +// NOLINTNEXTLINE(readability-identifier-naming) +class tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization : public ::testing::TestWithParam +{ + public: + tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization() : cpawTest(false, true, false, false){}; + + protected: + system_test::SystemTestRunner cpawTest; + + void Set_Launch_Params(double const &polarization, double const &vx) + { + // Constant for all tests + size_t const N = 32; + double const length = 1.5; + double const gamma = 5. / 3.; + double const tOut = 1.0; + double const pitch = std::asin(2. / 3.); + double const yaw = std::asin(2. / std::sqrt(5.)); + + // Domain settings + double const x_len = 2. * length, y_len = length, z_len = length; + int const nx = 2 * N, ny = N, nz = N; + + // Settings + cpawTest.chollaLaunchParams.append(" nx=" + to_string_exact(nx)); + cpawTest.chollaLaunchParams.append(" ny=" + to_string_exact(ny)); + cpawTest.chollaLaunchParams.append(" nz=" + to_string_exact(nz)); + cpawTest.chollaLaunchParams.append(" tout=" + to_string_exact(tOut)); + cpawTest.chollaLaunchParams.append(" outstep=" + to_string_exact(tOut)); + cpawTest.chollaLaunchParams.append(" init=Circularly_Polarized_Alfven_Wave"); + cpawTest.chollaLaunchParams.append(" xmin=0.0"); + cpawTest.chollaLaunchParams.append(" ymin=0.0"); + cpawTest.chollaLaunchParams.append(" zmin=0.0"); + cpawTest.chollaLaunchParams.append(" xlen=" + to_string_exact(x_len)); + cpawTest.chollaLaunchParams.append(" ylen=" + to_string_exact(y_len)); + cpawTest.chollaLaunchParams.append(" zlen=" + to_string_exact(z_len)); + cpawTest.chollaLaunchParams.append(" xl_bcnd=1"); + cpawTest.chollaLaunchParams.append(" xu_bcnd=1"); + cpawTest.chollaLaunchParams.append(" yl_bcnd=1"); + cpawTest.chollaLaunchParams.append(" yu_bcnd=1"); + cpawTest.chollaLaunchParams.append(" zl_bcnd=1"); + cpawTest.chollaLaunchParams.append(" zu_bcnd=1"); + cpawTest.chollaLaunchParams.append(" polarization=" + to_string_exact(polarization)); + cpawTest.chollaLaunchParams.append(" vx=" + to_string_exact(vx)); + cpawTest.chollaLaunchParams.append(" gamma=" + to_string_exact(gamma)); + cpawTest.chollaLaunchParams.append(" pitch=" + to_string_exact(pitch)); + cpawTest.chollaLaunchParams.append(" yaw=" + to_string_exact(yaw)); + } +}; + +// Moving wave with right and left polarization +// ============================================= +TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, MovingWaveCorrectInputExpectCorrectOutput) +{ + // Get the test parameter + double const polarization = GetParam(); + + // Set the wave to be moving + double const vx = 0.0; + +// Set allowed errors +#ifdef PCM + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(82); + double const allowedL1Error = 6.5E-2; // Based on results in Gardiner & Stone 2008 + double const allowedError = 4.6E-2; +#elif defined(PLMC) + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(84); + double const allowedL1Error = 5.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 5.0E-3; +#elif defined(PPMC) + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(84); + double const allowedL1Error = 4.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 3.0E-3; +#elif defined(PLMP) + double const allowedL1Error = 5.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 5.0E-3; +#elif defined(PPMP) + double const allowedL1Error = 4.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 3.0E-3; +#endif + + // Set the launch parameters + Set_Launch_Params(polarization, vx); + + // Check Results + cpawTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +// Standing wave with right and left polarization +// ============================================= +TEST_P(tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, StandingWaveCorrectInputExpectCorrectOutput) +{ + // Get the test parameter + double const polarization = GetParam(); + + // Set the wave to be standing + double const vx = -polarization; + +// Set allowed errors +#ifdef PCM + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(130); + double const allowedL1Error = 1.8E-2; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1.7E-2; +#elif defined(PLMC) + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(130); + double const allowedL1Error = 2.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 2.0E-3; +#elif defined(PPMC) + // Set the number of timesteps + cpawTest.setFiducialNumTimeSteps(130); + double const allowedL1Error = 1.3E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1.3E-3; +#elif defined(PLMP) + double const allowedL1Error = 2.0E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 2.0E-3; +#elif defined(PPMP) + double const allowedL1Error = 1.3E-3; // Based on results in Gardiner & Stone 2008 + double const allowedError = 1.3E-3; +#endif + + // Set the launch parameters + Set_Launch_Params(polarization, vx); + + // Check Results + cpawTest.runL1ErrorTest(allowedL1Error, allowedError); +} + +INSTANTIATE_TEST_SUITE_P(, tMHDSYSTEMCircularlyPolarizedAlfvenWaveParameterizedPolarization, + ::testing::Values(1.0, -1.0)); +/// @} +// ============================================================================= diff --git a/src/system_tests/particles_system_tests.cpp b/src/system_tests/particles_system_tests.cpp index 7f6d4552e..4b6b36575 100644 --- a/src/system_tests/particles_system_tests.cpp +++ b/src/system_tests/particles_system_tests.cpp @@ -20,11 +20,10 @@ * */ /// @{ -TEST(tPARTICLESSYSTEMSphericalCollapse, - CorrectInputExpectCorrectOutput) +TEST(tPARTICLESSYSTEMSphericalCollapse, DISABLED_CorrectInputExpectCorrectOutput) { - systemTest::SystemTestRunner collapseTest(true); - collapseTest.runTest(); + system_test::SystemTestRunner collapseTest(true); + collapseTest.runTest(); } /// @} // ============================================================================= \ No newline at end of file diff --git a/src/system_tests/system_tester.cpp b/src/system_tests/system_tester.cpp index c59e6e770..1888fd752 100644 --- a/src/system_tests/system_tester.cpp +++ b/src/system_tests/system_tester.cpp @@ -7,19 +7,23 @@ // STL includes #include -#include -#include -#include -#include + #include #include +#include +#include +#include +#include #include +#include +#include // External Libraries and Headers #include // Local includes -#include "../system_tests/system_tester.h" // Include the header file +#include "../io/io.h" +#include "../system_tests/system_tester.h" // Include the header file #include "../utils/testing_utilities.h" // ============================================================================= @@ -27,363 +31,473 @@ // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::runTest() +void system_test::SystemTestRunner::runTest(bool const &compute_L2_norm_only, double const &maxAllowedL1Error, + double const &maxAllowedError) { - /// Only run if this variable is set to `true`. Generally this and - /// globalCompareSystemTestResults should only be used for large MPI / tests - /// where the user wishes to separate the execution of cholla and the / - /// comparison of results onto different machines/jobs - if (globalRunCholla) - { - // Launch Cholla. Note that this dumps all console output to the console - // log file as requested by the user. - launchCholla(); + /// Only run if this variable is set to `true`. Generally this and + /// globalCompareSystemTestResults should only be used for large MPI / tests + /// where the user wishes to separate the execution of cholla and the / + /// comparison of results onto different machines/jobs + if (globalRunCholla) { + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + launchCholla(); + } + + /// If set to false then no comparison will be performed. Generally this and + /// globalRunCholla should only be used for large MPI tests where the user + /// wishes to separate the execution of cholla and the comparison of results + /// onto different machines/jobs + if (not globalCompareSystemTestResults) { + return; + } + + // Make sure we have all the required data files and open the test data file + _testHydroFieldsFileVec.resize(numMpiRanks); + _testParticlesFileVec.resize(numMpiRanks); + FnameTemplate fname_template(true, _outputDirectory); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { + // Load the hydro data + std::string filePath = fname_template.format_fname(1, fileIndex, ""); + if (_hydroDataExists and std::filesystem::exists(filePath)) { + _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } - /// If set to false then no comparison will be performed. Generally this and - /// globalRunCholla should only be used for large MPI tests where the user - /// wishes to separate the execution of cholla and the comparison of results - /// onto different machines/jobs - if (not globalCompareSystemTestResults) return; - - // Make sure we have all the required data files and open the test data file - _testHydroFieldsFileVec.resize(numMpiRanks); - _testParticlesFileVec.resize(numMpiRanks); - for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) - { - // Load the hydro data - if (_hydroDataExists) - { - std::string fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } - - // Load the particles data - if (_particleDataExists) - { - std::string fileName = "/1_particles.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testParticlesFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } + // Load the particles data + filePath = fname_template.format_fname(1, fileIndex, "_particles"); + if (_particleDataExists and std::filesystem::exists(filePath)) { + _testParticlesFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } + } - // If this is a particle build then read in the IDs and generate the sorting - // vector - if (_particleDataExists) - { - _testParticleIDs = _loadTestParticleData("particle_IDs"); + // If this is a particle build then read in the IDs and generate the sorting + // vector + if (_particleDataExists) { + _testParticleIDs = _loadTestParticleData("particle_IDs"); - if (_fiducialFileExists) _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); + if (_fiducialFileExists) { + _fiducialParticleIDs = _loadFiducialParticleData("particle_IDs"); } - - // Get the list of test dataset names - if (_hydroDataExists) - _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); - if (_particleDataExists) - { - // Load the data, replace the density value with the new name, then append - std::vector particleNames = _findDataSetNames(_testParticlesFileVec[0]); - auto iter = std::find(particleNames.begin(), particleNames.end(), "density"); - *iter = "particle_density"; - - _testDataSetNames.insert(_testDataSetNames.end(), - particleNames.begin(), - particleNames.end()); + } + + // Get the list of test dataset names + if (_hydroDataExists) { + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + } + if (_particleDataExists) { + // Load the data, replace the density value with the new name, then append + std::vector particleNames = _findDataSetNames(_testParticlesFileVec[0]); + auto iter = std::find(particleNames.begin(), particleNames.end(), "density"); + *iter = "particle_density"; + + _testDataSetNames.insert(_testDataSetNames.end(), particleNames.begin(), particleNames.end()); + } + + // Start Performing Checks + // ======================= + // Check the number of time steps + if (_compareNumTimeSteps) { + _checkNumTimeSteps(); + } + + // Check that the test file has as many, or more, datasets than the fiducial + // file. Provide a warning if the datasets are not the same size + EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) + << std::endl + << "Warning: The test data has " << _testDataSetNames.size() << " datasets and the fiducial data has " + << _fiducialDataSetNames.size() << " datasets" << std::endl + << std::endl; + + // Compute the L1 Error. + L2Norm_ = 0; + double maxError = 0; + // Loop over the datasets to be tested + for (auto const &dataSetName : _fiducialDataSetNames) { + // check that the test data has the dataset in it + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) + << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once."; + + // Get data vectors + std::vector testDims(3, 1); + std::vector testData; + std::vector fiducialData; + // This is just a vector of all the different dataset names for + // particles to help choose whether to call _loadTestParticleData + // or loadTestFieldData + std::vector particleIDs = {"particle_IDs", "pos_x", "pos_y", "pos_z", "vel_x", "vel_y", "vel_z"}; + if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) != particleIDs.end()) { + // This is a particle data set + + // Set some basic parameters + testDims[0] = _testTotalNumParticles; + + // Load in the data. Note the special handling for particle_IDs + if (dataSetName == "particle_IDs") { + testData = _testParticleIDs; + fiducialData = _fiducialParticleIDs; + } else { + testData = _loadTestParticleData(dataSetName); + fiducialData = _loadFiducialParticleData(dataSetName); + } + } else { + // This is a field data set + testData = loadTestFieldData(dataSetName, testDims); + // Get fiducial data + fiducialData = _loadFiducialFieldData(dataSetName); } - // Start Performing Checks - // ======================= - // Check the number of time steps - if (_compareNumTimeSteps) _checkNumTimeSteps(); - - // Check that the test file has as many, or more, datasets than the fiducial - // file. Provide a warning if the datasets are not the same size - EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) - << std::endl - << "Warning: The test data has " - << _testDataSetNames.size() - << " datasets and the fiducial data has " - << _fiducialDataSetNames.size() - << " datasets" << std::endl << std::endl; - - // Loop over the datasets to be tested - for (auto dataSetName: _fiducialDataSetNames) - { - // check that the test data has the dataset in it - ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) - << "The test data does not contain the dataset '" + dataSetName - + "' or contains it more than once."; - - // Get data vectors - std::vector testDims(3,1); - std::vector testData; - std::vector fiducialData; - // This is just a vector of all the different dataset names for - // particles to help choose whether to call _loadTestParticleData - // or loadTestFieldData - std::vector particleIDs = {"particle_IDs", - "pos_x", - "pos_y", - "pos_z", - "vel_x", - "vel_y", - "vel_z"}; - if (std::find(particleIDs.begin(), particleIDs.end(), dataSetName) - != particleIDs.end()) - { - // This is a particle data set - - // Set some basic parameters - testDims[0] = _testTotalNumParticles; - - // Load in the data. Note the special handling for particle_IDs - if (dataSetName == "particle_IDs") - { - testData = _testParticleIDs; - fiducialData = _fiducialParticleIDs; - } - else - { - testData = _loadTestParticleData(dataSetName); - fiducialData = _loadFiducialParticleData(dataSetName); - } - } - else - { - // This is a field data set - testData = loadTestFieldData(dataSetName, - testDims); - // Get fiducial data - fiducialData = _loadFiducialFieldData(dataSetName); + // Check that they're the same length + ASSERT_EQ(fiducialData.size(), testData.size()) + << "The fiducial and test '" << dataSetName << "' datasets are not the same length"; + + // Compare values + double L1_error = 0.0; + double fp_sum_error = 0.0; + for (size_t i = 0; i < testDims[0]; i++) { + for (size_t j = 0; j < testDims[1]; j++) { + for (size_t k = 0; k < testDims[2]; k++) { + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + if (compute_L2_norm_only) { + double const diff = std::abs(fiducialData.at(index) - testData.at(index)); + + maxError = std::max(maxError, diff); + + // Perform a Kahan sum to maintain precision in the result + double const y = diff - fp_sum_error; + double const t = L1_error + y; + fp_sum_error = (t - L1_error) - y; + L1_error = t; + } else { + // Check for equality and iff not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual = testing_utilities::nearlyEqualDbl(fiducialData.at(index), testData.at(index), absoluteDiff, + ulpsDiff, _fixedEpsilon); + ASSERT_TRUE(areEqual) << std::endl + << "Difference in " << dataSetName << " dataset at [" << i << "," << j << "," << k + << "]" << std::endl + << "The fiducial value is: " << fiducialData[index] << std::endl + << "The test value is: " << testData[index] << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + } } + } + } - // Check that they're the same length - ASSERT_EQ(fiducialData.size(), testData.size()) - << "The fiducial and test '" - << dataSetName - << "' datasets are not the same length"; - - // Compare values - for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - // Check for equality and iff not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - // Fixed epsilon is changed from the default since AMD/Clang - // appear to differ from NVIDIA/GCC/XL by roughly 1E-12 - double fixedEpsilon = 5.0E-12; - bool areEqual = testingUtilities::nearlyEqualDbl(fiducialData.at(index), - testData.at(index), - absoluteDiff, - ulpsDiff, - fixedEpsilon); - ASSERT_TRUE(areEqual) - << std::endl - << "Difference in " - << dataSetName - << " dataset at [" - << i << "," << j << "," << k <<"]" << std::endl - << "The fiducial value is: " << fiducialData[index] << std::endl - << "The test value is: " << testData[index] << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - } - } + if (compute_L2_norm_only) { + L1_error /= static_cast(testDims[0] * testDims[1] * testDims[2]); + L2Norm_ += L1_error * L1_error; } + } + + if (compute_L2_norm_only) { + // Check the L2 Norm + L2Norm_ = std::sqrt(L2Norm_); + EXPECT_LT(L2Norm_, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; + } } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::launchCholla() +void system_test::SystemTestRunner::runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError) { + /// Only run if this variable is set to `true`. Generally this and + /// globalCompareSystemTestResults should only be used for large MPI / tests + /// where the user wishes to separate the execution of cholla and the / + /// comparison of results onto different machines/jobs + if (globalRunCholla) { // Launch Cholla. Note that this dumps all console output to the console // log file as requested by the user. - std::string const chollaRunCommand = globalMpiLauncher.getString() + " " - + std::to_string(numMpiRanks) + " " - + _chollaPath + " " - + _chollaSettingsPath + " " - + chollaLaunchParams + " " - + "outdir=" + _outputDirectory + "/" - + " >> " + _consoleOutputPath + " 2>&1 "; - auto returnEcho = system(("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath).c_str()); - auto returnLaunch = system((chollaRunCommand).c_str()); - EXPECT_EQ(returnEcho, 0) - << "Warning: Echoing the launch command to the console output file " - << "returned a non-zero exit status code. Launch command is `" - << chollaRunCommand << "`" << std::endl; - EXPECT_EQ(returnLaunch, 0) - << "Warning: Launching Cholla returned a non-zero exit status. Likely " - << "failed to launch. Please see the log files" << std::endl; - - _safeMove("run_output.log", _outputDirectory); - // TODO: instead of commenting out, change to check if exist - //_safeMove("run_timing.log", _outputDirectory); + launchCholla(); + } + + // Check that there is hydro data and no particle data + if (_particleDataExists) { + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest does not support particles"; + throw std::runtime_error(errMessage); + } + if (not _hydroDataExists) { + std::string errMessage = "Error: SystemTestRunner::runL1ErrorTest requires hydro data"; + throw std::runtime_error(errMessage); + } + + /// If set to false then no comparison will be performed. Generally this and + /// globalRunCholla should only be used for large MPI tests where the user + /// wishes to separate the execution of cholla and the comparison of results + /// onto different machines/jobs + if (not globalCompareSystemTestResults) { + return; + } + + // Make sure we have all the required data files and open the data files + _testHydroFieldsFileVec.resize(numMpiRanks); + std::vector initialHydroFieldsFileVec(numMpiRanks); + FnameTemplate fname_template(true, _outputDirectory); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { + // Initial time data + std::string filePath = fname_template.format_fname(0, fileIndex, ""); + if (std::filesystem::exists(filePath)) { + initialHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); + } + + // Final time data + filePath = fname_template.format_fname(1, fileIndex, ""); + if (std::filesystem::exists(filePath)) { + _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); + } + } + + // Get the list of test dataset names + _fiducialDataSetNames = _findDataSetNames(initialHydroFieldsFileVec[0]); + _testDataSetNames = _findDataSetNames(_testHydroFieldsFileVec[0]); + + // Start Performing Checks + // ======================= + // Check the number of time steps + if (_compareNumTimeSteps) { + _checkNumTimeSteps(); + } + + // Check that the test file has as many, or more, datasets than the fiducial + // file. Provide a warning if the datasets are not the same size + EXPECT_GE(_testDataSetNames.size(), _fiducialDataSetNames.size()) + << std::endl + << "Warning: The test data has " << _testDataSetNames.size() << " datasets and the fiducial data has " + << _fiducialDataSetNames.size() << " datasets" << std::endl + << std::endl; + + // Loop over the datasets to be tested + L2Norm_ = 0; + double maxError = 0; + for (auto const &dataSetName : _fiducialDataSetNames) { + if (dataSetName == "GasEnergy") { + continue; + } + + // check that the test data has the dataset in it + ASSERT_EQ(std::count(_testDataSetNames.begin(), _testDataSetNames.end(), dataSetName), 1) + << "The test data does not contain the dataset '" + dataSetName + "' or contains it more than once."; + + // Get data vectors + std::vector initialDims(3, 1); + std::vector initialData; + std::vector finalDims(3, 1); + std::vector finalData; + + // This is a field data set + initialData = loadTestFieldData(dataSetName, initialDims, initialHydroFieldsFileVec); + // Get fiducial data + finalData = loadTestFieldData(dataSetName, finalDims, _testHydroFieldsFileVec); + + // Check that they're the same length + ASSERT_EQ(initialData.size(), finalData.size()) + << "The initial and final '" << dataSetName << "' datasets are not the same length"; + + // Compute the L1 Error. + double L1_error = 0.0; + double fp_sum_error = 0.0; + for (size_t i = 0; i < initialData.size(); i++) { + double const diff = std::abs(initialData.at(i) - finalData.at(i)); + + maxError = std::max(maxError, diff); + + // Perform a Kahan sum to maintain precision in the result + double const y = diff - fp_sum_error; + double const t = L1_error + y; + fp_sum_error = (t - L1_error) - y; + L1_error = t; + } + + L1_error /= static_cast(initialDims[0] * initialDims[1] * initialDims[2]); + L2Norm_ += L1_error * L1_error; + + // Perform the correctness check + EXPECT_LT(L1_error, maxAllowedL1Error) + << "the L1 error for the " << dataSetName << " data has exceeded the allowed value"; + } + + // Check the L2 Norm + L2Norm_ = std::sqrt(L2Norm_); + EXPECT_LT(L2Norm_, maxAllowedL1Error) << "the norm of the L1 error vector has exceeded the allowed value"; + + // Check the Max Error + EXPECT_LT(maxError, maxAllowedError) << "The maximum error has exceeded the allowed value"; } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::openHydroTestData() +void system_test::SystemTestRunner::launchCholla() { - _testHydroFieldsFileVec.resize(numMpiRanks); - for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) - { - std::string fileName = "/1.h5." + std::to_string(fileIndex); - _checkFileExists(_outputDirectory + fileName); - _testHydroFieldsFileVec[fileIndex].openFile(_outputDirectory + fileName, - H5F_ACC_RDONLY); - } + // Launch Cholla. Note that this dumps all console output to the console + // log file as requested by the user. + std::string const chollaRunCommand = globalMpiLauncher.getString() + " " + std::to_string(numMpiRanks) + " " + + _chollaPath + " " + _chollaSettingsPath + " " + chollaLaunchParams + " " + + "outdir=" + _outputDirectory + "/" + " >> " + _consoleOutputPath + " 2>&1 "; + auto returnEcho = system(("echo Launch Command: " + chollaRunCommand + " >> " + _consoleOutputPath).c_str()); + auto returnLaunch = system((chollaRunCommand).c_str()); + EXPECT_EQ(returnEcho, 0) << "Warning: Echoing the launch command to the console output file " + << "returned a non-zero exit status code. Launch command is `" << chollaRunCommand << "`" + << std::endl; + EXPECT_EQ(returnLaunch, 0) << "Warning: Launching Cholla returned a non-zero exit status. Likely " + << "failed to launch. Please see the log files" << std::endl; + + // Move the output files to the correct spots + std::filesystem::rename(::globalChollaRoot.getString() + "/run_output.log", _outputDirectory + "/run_output.log"); + try { + std::filesystem::rename(::globalChollaRoot.getString() + "/run_timing.log", _outputDirectory + "/run_timing.log"); + } catch (const std::filesystem::filesystem_error &error) { + // This file might not exist and isn't required so don't worry if it doesn't exist + } } // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::setFiducialData(std::string const &fieldName, - std::vector const &dataVec) +void system_test::SystemTestRunner::openHydroTestData() { - // First check if there's a fiducial data file - if (_fiducialFileExists) - { - std::string errMessage = "Error: Fiducial data file already exists for test '" - + _fullTestFileName - + "' and cannot be overwritten."; - throw std::runtime_error(errMessage); + _testHydroFieldsFileVec.resize(numMpiRanks); + for (size_t fileIndex = 0; fileIndex < numMpiRanks; fileIndex++) { + std::string filePath = FnameTemplate(true, _outputDirectory).format_fname(1, fileIndex, ""); + if (std::filesystem::exists(filePath)) { + _testHydroFieldsFileVec[fileIndex].openFile(filePath, H5F_ACC_RDONLY); } + } +} +// ============================================================================= - // Put new vector into map - _fiducialDataSets[fieldName] = dataVec; +// ============================================================================= +void system_test::SystemTestRunner::setFiducialData(std::string const &fieldName, std::vector const &dataVec) +{ + // First check if there's a fiducial data file + if (_fiducialDataSets.count(fieldName) > 0) { + std::string errMessage = + "Error: Fiducial dataset for field '" + fieldName + "' already exists and cannot be overwritten"; + throw std::runtime_error(errMessage); + } + + // Put new vector into map + _fiducialDataSets[fieldName] = dataVec; } // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::generateConstantData( - double const &value, - size_t const &nx, - size_t const &ny, - size_t const &nz) +std::vector system_test::SystemTestRunner::generateConstantData(double const &value, size_t const &nx, + size_t const &ny, size_t const &nz) { - size_t const length = nx*ny*nz; - std::vector outVec(length); - for (size_t i = 0; i < length; i++) - { - outVec[i] = value; - } - return outVec; + size_t const length = nx * ny * nz; + std::vector outVec(length); + for (size_t i = 0; i < length; i++) { + outVec[i] = value; + } + return outVec; } // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::generateSineData( - double const &offset, - double const &litude, - double const &kx, - double const &ky, - double const &kz, - double const &phase, - size_t const &nx, - size_t const &ny, - size_t const &nz) +std::vector system_test::SystemTestRunner::generateSineData(double const &offset, double const &litude, + double const &kx, double const &ky, + double const &kz, double const &phase, + size_t const &nx, size_t const &ny, + size_t const &nz) { - size_t const length = nx*ny*nz; - std::vector outVec(length); - for (size_t i = 0; i < nx; i++) - { - for (size_t j = 0; j < ny; j++) - { - for (size_t k = 0; k < nz; k++) - { - double value = offset + amplitude - * std::sin(kx*i + ky*j + kz*k + phase); - - size_t index = (i * ny * nz) + (j * nz) + k; - outVec[index] = value; - } - } + size_t const length = nx * ny * nz; + std::vector outVec(length); + for (size_t i = 0; i < nx; i++) { + for (size_t j = 0; j < ny; j++) { + for (size_t k = 0; k < nz; k++) { + double value = offset + amplitude * std::sin(kx * i + ky * j + kz * k + phase); + + size_t index = (i * ny * nz) + (j * nz) + k; + outVec[index] = value; + } } - return outVec; + } + return outVec; } // ============================================================================= // ============================================================================= // Constructor -systemTest::SystemTestRunner::SystemTestRunner(bool const &particleData, - bool const &hydroData, - bool const &useFiducialFile, - bool const &useSettingsFile) - : - _particleDataExists(particleData), - _hydroDataExists(hydroData) +system_test::SystemTestRunner::SystemTestRunner(bool const &particleData, bool const &hydroData, + bool const &useFiducialFile, bool const &useSettingsFile) + : _particleDataExists(particleData), _hydroDataExists(hydroData) { - // Get the test name, with and underscore instead of a "." since - // we're actually generating file names - const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); - std::stringstream nameStream; - std::string suiteName = test_info->test_suite_name(); - suiteName = suiteName.substr(suiteName.find("/")+1, suiteName.length()); - nameStream << suiteName << "_" << test_info->name(); - std::string fullTestName = nameStream.str(); - _fullTestFileName = fullTestName.substr(0, fullTestName.find("/")); - - // Generate the input paths. Strip out everything after a "/" since that - // probably indicates a parameterized test - _chollaPath = ::globalChollaRoot.getString() - + "/bin/cholla." - + ::globalChollaBuild.getString() - + "." + ::globalChollaMachine.getString(); - _chollaSettingsPath = ::globalChollaRoot.getString() - + "/src/system_tests/input_files/" - + _fullTestFileName + ".txt"; - _fiducialFilePath = ::globalChollaRoot.getString() - + "/cholla-tests-data/system_tests/" - + _fullTestFileName + ".h5"; - - // Generate output paths, these files don't exist yet - _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; - _consoleOutputPath = _outputDirectory + "/" + _fullTestFileName + "_console.log"; - - // Create the new directory and check that it exists - // TODO: C++17: When we update to C++17 or newer this section should - // TODO: use std::filesystem to create the directory and check that - // TODO: it exists - if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0) - { - std::cerr << "Warning: Directory '" - + _outputDirectory - + "' either already exists or could not be created." - << std::endl; + // Get the test name, with and underscore instead of a "." since + // we're actually generating file names + const ::testing::TestInfo *const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); + std::stringstream nameStream; + std::string suiteName = test_info->test_suite_name(); + suiteName = suiteName.substr(suiteName.find('/') + 1, suiteName.length()); + nameStream << suiteName << "_" << test_info->name(); + std::string fullTestName = nameStream.str(); + _fullTestFileName = fullTestName.substr(0, fullTestName.find('/')); + + // Generate the input paths. Strip out everything after a "/" since that + // probably indicates a parameterized test. + _chollaPath = ::globalChollaRoot.getString() + "/bin/cholla." + ::globalChollaBuild.getString() + "." + + ::globalChollaMachine.getString(); + + // Check that Cholla exists and abort if it doesn't + if (not std::filesystem::exists(_chollaPath)) { + throw std::invalid_argument("Error: Cholla executable not found."); + } + + // Check that settings file exist + if (useSettingsFile) { + _chollaSettingsPath = + ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + _fullTestFileName + ".txt"; + } else { + _chollaSettingsPath = ::globalChollaRoot.getString() + "/src/system_tests/input_files/" + "blank_settings_file.txt"; + } + if (not std::filesystem::exists(_chollaSettingsPath)) { + throw std::invalid_argument("Error: Cholla settings file not found at :" + _chollaSettingsPath); + } + + // Check that the fiducial file exists and load it if it does + if (useFiducialFile) { + _fiducialFilePath = ::globalChollaRoot.getString() + "/cholla-tests-data/system_tests/" + _fullTestFileName + ".h5"; + if (not std::filesystem::exists(_fiducialFilePath)) { + throw std::invalid_argument("Error: Cholla fiducial data file not found at :" + _fiducialFilePath); } - - // Check that the files exist and load fiducial HDF5 file if required - _checkFileExists(_chollaPath); - if (useSettingsFile) _checkFileExists(_chollaSettingsPath); - if (useFiducialFile) - { - _checkFileExists(_fiducialFilePath); - _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); - _fiducialDataSetNames = _findDataSetNames(_fiducialFile); - _fiducialFileExists = true; - }; + _fiducialFile.openFile(_fiducialFilePath, H5F_ACC_RDONLY); + _fiducialDataSetNames = _findDataSetNames(_fiducialFile); + _fiducialFileExists = true; + } else { + _fiducialFilePath = ""; + } + + // Generate output paths, these files don't exist yet + _outputDirectory = ::globalChollaRoot.getString() + "/bin/" + fullTestName; + _consoleOutputPath = _outputDirectory + "/" + _fullTestFileName + "_console.log"; + + // Create the new directory and check that it exists + // TODO: C++17: When we update to C++17 or newer this section should + // TODO: use std::filesystem to create the directory and check that + // TODO: it exists + if (system(("mkdir --parents " + _outputDirectory).c_str()) != 0) { + std::cerr << "Warning: Directory '" + _outputDirectory + "' either already exists or could not be created." + << std::endl; + } } // ============================================================================= // ============================================================================= // Destructor -systemTest::SystemTestRunner::~SystemTestRunner() +system_test::SystemTestRunner::~SystemTestRunner() { - _fiducialFile.close(); - for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) - { - if (_hydroDataExists) _testHydroFieldsFileVec[i].close(); - if (_particleDataExists) _testParticlesFileVec[i].close(); + _fiducialFile.close(); + for (size_t i = 0; i < _testHydroFieldsFileVec.size(); i++) { + if (_hydroDataExists) { + _testHydroFieldsFileVec[i].close(); + } + if (_particleDataExists) { + _testParticlesFileVec[i].close(); } + } } // ============================================================================= @@ -392,331 +506,270 @@ systemTest::SystemTestRunner::~SystemTestRunner() // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::_checkFileExists(std::string const &filePath) +void system_test::SystemTestRunner::_checkNumTimeSteps() { - // TODO C++17 std::filesystem does this better - std::fstream file; - file.open(filePath); - if (not file) - { - std::string errMessage = "Error: File '" + filePath + "' not found."; - throw std::invalid_argument(errMessage); - } -} + int fiducialNSteps, testNSteps; + + H5::Attribute tStepAttr; + if (_hydroDataExists) { + tStepAttr = _testHydroFieldsFileVec[0].openAttribute("n_step"); + } else if (_particleDataExists) { + tStepAttr = _testParticlesFileVec[0].openAttribute("n_step"); + } else { + std::string errMessage = "Error: Both hydro and particle data are turned off."; + throw std::invalid_argument(errMessage); + } + + tStepAttr.read(H5::PredType::NATIVE_INT, &testNSteps); + + if (_fiducialFileExists) { + tStepAttr = _fiducialFile.openAttribute("n_step"); + tStepAttr.read(H5::PredType::NATIVE_INT, &fiducialNSteps); + } else { + fiducialNSteps = _numFiducialTimeSteps; + } + + EXPECT_EQ(fiducialNSteps, testNSteps) << "The number of time steps is not equal"; +}; // ============================================================================= // ============================================================================= -void systemTest::SystemTestRunner::_safeMove(std::string const &sourcePath, - std::string const &destinationDirectory) +std::vector system_test::SystemTestRunner::loadTestFieldData(std::string dataSetName, + std::vector &testDims, + std::vector file) { - // TODO C++17 std::filesystem does this better - _checkFileExists(sourcePath); - if(std::rename(sourcePath.c_str(), (destinationDirectory + "/" + sourcePath).c_str()) < 0) - { - std::string errMessage = "Error: File '" - + sourcePath - + "' could not be moved to '" - + destinationDirectory - + "`"; - throw std::invalid_argument(errMessage); + // Switch which fileset we're using if it's a particle dataset + if (dataSetName == "particle_density") { + file = _testParticlesFileVec; + dataSetName = "density"; + } else if (file.empty()) { + file = _testHydroFieldsFileVec; + } + + // Get the size of each dimension. Check if the field is a magnetic + // field or not to make sure we're retreiving the right dimensions + H5::Attribute dimensions = file[0].openAttribute("dims"); + dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); + + if (dataSetName == "magnetic_x") { + testDims.at(0)++; + } else if (dataSetName == "magnetic_y") { + testDims.at(1)++; + } else if (dataSetName == "magnetic_z") { + testDims.at(2)++; + } + + // Allocate the vector and initialize to a quiet NaN to make failed writes clearer + std::vector testData(testDims[0] * testDims[1] * testDims[2], std::numeric_limits::quiet_NaN()); + + for (size_t rank = 0; rank < numMpiRanks; rank++) { + // Open the dataset + H5::DataSet const testDataSet = file[rank].openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace const testDataSpace = testDataSet.getSpace(); + + std::vector tempDims{1, 1, 1}; + int numTestDims = testDataSpace.getSimpleExtentDims(tempDims.data()); + + // Allocate vectors, Note that I'm casting everything to double. Some + // of the vectors are ints in the HDF5 file and if the casting + // becomes an issue we can fix it later + std::vector tempArr(tempDims[0] * tempDims[1] * tempDims[2]); + + // Read in data + testDataSet.read(tempArr.data(), H5::PredType::NATIVE_DOUBLE); + + // Get offset + std::vector offset(3, 1); + H5::Attribute offsetAttr = file[rank].openAttribute("offset"); + offsetAttr.read(H5::PredType::NATIVE_INT, offset.data()); + + // Get dims_local + std::vector dimsLocal(3, 1); + H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local"); + dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); + + if (dataSetName == "magnetic_x") { + dimsLocal.at(0)++; + } else if (dataSetName == "magnetic_y") { + dimsLocal.at(1)++; + } else if (dataSetName == "magnetic_z") { + dimsLocal.at(2)++; } -} -// ============================================================================= - -// ============================================================================= -void systemTest::SystemTestRunner::_checkNumTimeSteps() -{ - int fiducialNSteps, testNSteps; - H5::Attribute tStepAttr; - if (_hydroDataExists) - { - tStepAttr = _testHydroFieldsFileVec[0].openAttribute("n_step"); - } - else if (_particleDataExists) - { - tStepAttr = _testParticlesFileVec[0].openAttribute("n_step"); - } - else - { - std::string errMessage = "Error: Both hydro and particle data are turned off."; - throw std::invalid_argument(errMessage); - } + // Now we add the data to the larger vector + size_t localIndex = 0; + for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++) { + for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++) { + for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++) { + // Compute the location to put the next element + size_t overallIndex = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - tStepAttr.read(H5::PredType::NATIVE_INT, &testNSteps); + // Perform copy + testData[overallIndex] = tempArr[localIndex]; - if (_fiducialFileExists) - { - tStepAttr = _fiducialFile.openAttribute("n_step"); - tStepAttr.read(H5::PredType::NATIVE_INT, &fiducialNSteps); - } - else - { - fiducialNSteps = _numFiducialTimeSteps; + // Increment local index + localIndex++; + } + } } + } - EXPECT_EQ(fiducialNSteps, testNSteps) - << "The number of time steps is not equal"; -}; + // Return the entire, concatenated, dataset + return testData; +} // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::loadTestFieldData( - std::string dataSetName, - std::vector &testDims) +std::vector system_test::SystemTestRunner::_loadTestParticleData(std::string const &dataSetName) { - // Get the file we're using - std::vector file; - if (dataSetName == "particle_density") - { - file = _testParticlesFileVec; - dataSetName = "density"; - } - else - { - file = _testHydroFieldsFileVec; - } - - // Get the size of each dimension - H5::Attribute dimensions = file[0].openAttribute("dims"); - dimensions.read(H5::PredType::NATIVE_ULONG, testDims.data()); - - // Allocate the vector - std::vector testData(testDims[0] * testDims[1] * testDims[2]); - - for (size_t rank = 0; rank < numMpiRanks; rank++) - { - // Open the dataset - H5::DataSet const testDataSet = file[rank].openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace const testDataSpace = testDataSet.getSpace(); - - std::vector tempDims{1,1,1}; - int numTestDims = testDataSpace.getSimpleExtentDims(tempDims.data()); - - // Allocate vectors, Note that I'm casting everything to double. Some - // of the vectors are ints in the HDF5 file and if the casting - // becomes an issue we can fix it later - std::vector tempArr(tempDims[0] * tempDims[1] * tempDims[2]); - - // Read in data - testDataSet.read(tempArr.data(), H5::PredType::NATIVE_DOUBLE); - - // Get offset - std::vector offset(3,1); - H5::Attribute offsetAttr = file[rank].openAttribute("offset"); - offsetAttr.read(H5::PredType::NATIVE_INT, offset.data()); - - // Get dims_local - std::vector dimsLocal(3,1); - H5::Attribute dimsLocalAttr = file[rank].openAttribute("dims_local"); - dimsLocalAttr.read(H5::PredType::NATIVE_INT, dimsLocal.data()); - - // Now we add the data to the larger vector - size_t localIndex = 0; - for (size_t i = offset[0]; i < offset[0] + dimsLocal[0]; i++) - { - for (size_t j = offset[1]; j < offset[1] + dimsLocal[1]; j++) - { - for (size_t k = offset[2]; k < offset[2] + dimsLocal[2]; k++) - { - // Compute the location to put the next element - size_t overallIndex = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - // Perform copy - testData[overallIndex] = tempArr[localIndex]; - - // Increment local index - localIndex++; - } - } - } + // Determine the total number of particles + if (_testTotalNumParticles == 0) { + for (auto const &file : _testParticlesFileVec) { + // Open the dataset + H5::DataSet const dataSet = file.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace dataSpace = dataSet.getSpace(); + + // Get the number of elements and increase the total count + size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); + _testTotalNumParticles += localNumParticles; } + } + + // Allocate the vectors + std::vector unsortedTestData; + std::vector testData(_testTotalNumParticles); + + // Load in the data + for (size_t rank = 0; rank < numMpiRanks; rank++) { + // Open the dataset + H5::DataSet const testDataSet = _testParticlesFileVec[rank].openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace const testDataSpace = testDataSet.getSpace(); + + size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); + std::vector tempVector(localNumParticles); + + // Read in data + testDataSet.read(tempVector.data(), H5::PredType::NATIVE_DOUBLE); + unsortedTestData.insert(unsortedTestData.end(), tempVector.begin(), tempVector.end()); + } + + // Generate the sorting vector if it's not already generated + std::vector tempSortedIndices; + if (dataSetName == "particle_IDs") { + tempSortedIndices.resize(_testTotalNumParticles); + std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); + std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), + [&](size_t A, size_t B) -> bool { return unsortedTestData[A] < unsortedTestData[B]; }); + } + std::vector static const sortedIndices = tempSortedIndices; + + // Sort the vector + for (size_t i = 0; i < _testTotalNumParticles; i++) { + testData.at(i) = unsortedTestData.at(sortedIndices.at(i)); + } + + // Return the entire dataset fully concatenated and sorted + return testData; +} +// ============================================================================= - // Return the entire, concatenated, dataset - return testData; +// ============================================================================= +std::vector system_test::SystemTestRunner::_loadFiducialFieldData(std::string const &dataSetName) +{ + if (_fiducialFileExists and (_fiducialDataSets.find(dataSetName) == _fiducialDataSets.end())) { + // Open the dataset + H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace fiducialDataSpace = fiducialDataSet.getSpace(); + + std::vector fidDims{1, 1, 1}; + fiducialDataSpace.getSimpleExtentDims(fidDims.data()); + + // Allocate vectors, Note that I'm casting everything to double. Some + // of the vectors are ints in the HDF5 file and if the casting + // becomes an issue we can fix it later + std::vector fiducialData(fidDims[0] * fidDims[1] * fidDims[2]); + + // Read in data + fiducialDataSet.read(fiducialData.data(), H5::PredType::NATIVE_DOUBLE); + return fiducialData; + } else { + return _fiducialDataSets[dataSetName]; + } } // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadTestParticleData( - std::string const &dataSetName) +std::vector system_test::SystemTestRunner::_loadFiducialParticleData(std::string const &dataSetName) { + if (_fiducialFileExists) { // Determine the total number of particles - if (_testTotalNumParticles == 0) - { - for (auto file: _testParticlesFileVec) - { - // Open the dataset - H5::DataSet const dataSet = file.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace dataSpace = dataSet.getSpace(); - - // Get the number of elements and increase the total count - size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); - _testTotalNumParticles += localNumParticles; - } + if (_fiducialTotalNumParticles == 0) { + // Open the dataset + H5::DataSet const dataSet = _fiducialFile.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace dataSpace = dataSet.getSpace(); + + // Get the number of elements and increase the total count + size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); + _fiducialTotalNumParticles += localNumParticles; } // Allocate the vectors - std::vector unsortedTestData; - std::vector testData(_testTotalNumParticles); + std::vector unsortedFiducialData(_fiducialTotalNumParticles); + std::vector fiducialData(_fiducialTotalNumParticles); // Load in the data - for (size_t rank = 0; rank < numMpiRanks; rank++) - { - // Open the dataset - H5::DataSet const testDataSet = _testParticlesFileVec[rank].openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace const testDataSpace = testDataSet.getSpace(); - - size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); - std::vector tempVector(localNumParticles); - - // Read in data - testDataSet.read(tempVector.data(), - H5::PredType::NATIVE_DOUBLE); - unsortedTestData.insert(unsortedTestData.end(), - tempVector.begin(), - tempVector.end() ); - } + // Open the dataset + H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); + + // Determine dataset size/shape and check that it's correct + H5::DataSpace const testDataSpace = fiducialDataSet.getSpace(); + + size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); + + // Read in data + fiducialDataSet.read(unsortedFiducialData.data(), H5::PredType::NATIVE_DOUBLE); // Generate the sorting vector if it's not already generated std::vector tempSortedIndices; - if (dataSetName == "particle_IDs") - { - tempSortedIndices.resize(_testTotalNumParticles); - std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); - std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), - [&](size_t A, size_t B) -> bool { - return unsortedTestData[A] < unsortedTestData[B]; - }); + if (dataSetName == "particle_IDs") { + tempSortedIndices.resize(_fiducialTotalNumParticles); + std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); + std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), + [&](size_t A, size_t B) -> bool { return unsortedFiducialData.at(A) < unsortedFiducialData.at(B); }); } - std::vector static const sortedIndices = tempSortedIndices; + std::vector const static sortedIndices = tempSortedIndices; // Sort the vector - for (size_t i = 0; i < _testTotalNumParticles; i++) - { - testData.at(i) = unsortedTestData.at(sortedIndices.at(i)); + for (size_t i = 0; i < _fiducialTotalNumParticles; i++) { + fiducialData.at(i) = unsortedFiducialData.at(sortedIndices.at(i)); } // Return the entire dataset fully concatenated and sorted - return testData; + return fiducialData; + } else { + return _fiducialDataSets[dataSetName]; + } } // ============================================================================= // ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialFieldData( - std::string const &dataSetName) +std::vector system_test::SystemTestRunner::_findDataSetNames(H5::H5File const &inputFile) { - if (_fiducialFileExists) - { - // Open the dataset - H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace fiducialDataSpace = fiducialDataSet.getSpace(); - - std::vector fidDims{1,1,1}; - fiducialDataSpace.getSimpleExtentDims(fidDims.data()); + std::vector outputVector; - // Allocate vectors, Note that I'm casting everything to double. Some - // of the vectors are ints in the HDF5 file and if the casting - // becomes an issue we can fix it later - std::vector fiducialData(fidDims[0] * fidDims[1] * fidDims[2]); - - // Read in data - fiducialDataSet.read(fiducialData.data(), H5::PredType::NATIVE_DOUBLE); - return fiducialData; - } - else - { - return _fiducialDataSets[dataSetName]; - } -} -// ============================================================================= - -// ============================================================================= -std::vector systemTest::SystemTestRunner::_loadFiducialParticleData( - std::string const &dataSetName) -{ - if (_fiducialFileExists) - { - // Determine the total number of particles - if (_fiducialTotalNumParticles == 0) - { - // Open the dataset - H5::DataSet const dataSet = _fiducialFile.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace dataSpace = dataSet.getSpace(); - - // Get the number of elements and increase the total count - size_t localNumParticles = dataSpace.getSimpleExtentNpoints(); - _fiducialTotalNumParticles += localNumParticles; - } - - // Allocate the vectors - std::vector unsortedFiducialData(_fiducialTotalNumParticles); - std::vector fiducialData(_fiducialTotalNumParticles); - - // Load in the data - // Open the dataset - H5::DataSet const fiducialDataSet = _fiducialFile.openDataSet(dataSetName); - - // Determine dataset size/shape and check that it's correct - H5::DataSpace const testDataSpace = fiducialDataSet.getSpace(); - - size_t localNumParticles = testDataSpace.getSimpleExtentNpoints(); - - // Read in data - fiducialDataSet.read(unsortedFiducialData.data(), - H5::PredType::NATIVE_DOUBLE); - - // Generate the sorting vector if it's not already generated - std::vector tempSortedIndices; - if (dataSetName == "particle_IDs") - { - tempSortedIndices.resize(_fiducialTotalNumParticles); - std::iota(tempSortedIndices.begin(), tempSortedIndices.end(), 0); - std::sort(tempSortedIndices.begin(), tempSortedIndices.end(), - [&](size_t A, size_t B) -> bool { - return unsortedFiducialData.at(A) < unsortedFiducialData.at(B); - }); - } - std::vector const static sortedIndices = tempSortedIndices; - - // Sort the vector - for (size_t i = 0; i < _fiducialTotalNumParticles; i++) - { - fiducialData.at(i) = unsortedFiducialData.at(sortedIndices.at(i)); - } - - // Return the entire dataset fully concatenated and sorted - return fiducialData; - } - else - { - return _fiducialDataSets[dataSetName]; - } -} -// ============================================================================= - -// ============================================================================= -std::vector systemTest::SystemTestRunner::_findDataSetNames( - H5::H5File const &inputFile) -{ - std::vector outputVector; - - for (size_t dataSetID = 0; - dataSetID < inputFile.getNumObjs(); - dataSetID++) - { - outputVector.push_back(inputFile.getObjnameByIdx(dataSetID)); - } - return outputVector; + for (size_t dataSetID = 0; dataSetID < inputFile.getNumObjs(); dataSetID++) { + outputVector.push_back(inputFile.getObjnameByIdx(dataSetID)); + } + return outputVector; }; // ============================================================================= diff --git a/src/system_tests/system_tester.h b/src/system_tests/system_tester.h index 6d5aa1925..c0612806e 100644 --- a/src/system_tests/system_tester.h +++ b/src/system_tests/system_tester.h @@ -9,10 +9,10 @@ #pragma once // STL includes -#include -#include #include +#include #include +#include // External Libraries and Headers #include @@ -21,355 +21,358 @@ * \brief This namespace contains one class, SystemTestRunner, whose * purpose is to (as you might expect) run system tests. */ -namespace systemTest +namespace system_test { - /*! - * \brief Runs a system test using the full test name to determine all - * paths. - * - * \details By default this class uses the full name of your test, i.e. the test - * suite name plus the test name, along with some global variables to - * determine the paths to all the input files. The global variables are all - * set in main_tests.cpp and are the path to the Cholla directory, the make - * type being used, and the machine being run on. If the main function does - * get those it will throw an error so that error checking is not done here. - * - * To run a system test simply name the test according to convetion and put - * the input file in the `cholla/src/system_tests/input_files` directory and - * the data file in the `cholla/src/system_tests/fiducial_data` directory. - * Then name the files `testSuiteName_testCaseName` with the `.txt` or `.h5` - * extension respectively. If this class can't find the files it will - * throw an error with the path it searched. All the output files from the - * test are deposited in `cholla/bin/testSuiteName_testCaseName` - * - * More advanced functionality is provided with a series of member functions - * that allow you to programmatically generate the fiducial HDF5 file, - * choose which datasets to compare, whether or not to compare the number of - * time steps, etc. - * - */ - class SystemTestRunner; -} // namespace systemTest - -class systemTest::SystemTestRunner +/*! + * \brief Runs a system test using the full test name to determine all + * paths. + * + * \details By default this class uses the full name of your test, i.e. the test + * suite name plus the test name, along with some global variables to + * determine the paths to all the input files. The global variables are all + * set in main_tests.cpp and are the path to the Cholla directory, the make + * type being used, and the machine being run on. If the main function does + * get those it will throw an error so that error checking is not done here. + * + * To run a system test simply name the test according to convetion and put + * the input file in the `cholla/src/system_tests/input_files` directory and + * the data file in the `cholla/src/system_tests/fiducial_data` directory. + * Then name the files `testSuiteName_testCaseName` with the `.txt` or `.h5` + * extension respectively. If this class can't find the files it will + * throw an error with the path it searched. All the output files from the + * test are deposited in `cholla/bin/testSuiteName_testCaseName` + * + * More advanced functionality is provided with a series of member functions + * that allow you to programmatically generate the fiducial HDF5 file, + * choose which datasets to compare, whether or not to compare the number of + * time steps, etc. + * + */ +class SystemTestRunner; +} // namespace system_test + +class system_test::SystemTestRunner { -public: - /// The number of MPI ranks, defaults to 1 - size_t numMpiRanks = 1; - - /*! - * \brief Set the parameters that Cholla launches with, potentially entirely - * replacing the need for a settings file. A string of the launch parameters - * that will override the values in the settings file (if given). Any of - * Cholla's standard launch paramters work except `outdir` as that is - * reserved for usage in the systemTest::SystemTestRunner.runTest() method - */ - std::string chollaLaunchParams; - - /*! - * \brief Run the system test that has been set up - * - */ - void runTest(); - - void launchCholla(); - - void openHydroTestData(); - /*! - * \brief Get the Cholla Path object - * - * \return std::string The path to the Cholla executable - */ - std::string getChollaPath(){return _chollaPath;}; - - /*! - * \brief Get the Cholla Settings File Path object - * - * \return std::string The full filename/path to the settings file used to - * initialize Cholla - */ - std::string getChollaSettingsFilePath(){return _chollaSettingsPath;}; - - /*! - * \brief Get the Output Directory object - * - * \return std::string The path to the directory where all the output is - * stored - */ - std::string getOutputDirectory(){return _outputDirectory;}; - - /*! - * \brief Get the Console Output Path object - * - * \return std::string The full filename/path to the file where all the - * console output is stored - */ - std::string getConsoleOutputPath(){return _consoleOutputPath;}; - - /*! - * \brief Get the Fiducial File object - * - * \return H5::H5File - */ - H5::H5File getFiducialFile(){return _fiducialFile;}; - - /*! - * \brief Get the Test File object - * - * \param index The MPI rank of the file you want to return. Defaults to 0 - * \return H5::H5File - */ - H5::H5File getTestFile(size_t const &i = 0){return _testHydroFieldsFileVec[i];}; - - /*! - * \brief Get the vector of datasets that will be tested - * - * \return std::vector - */ - std::vector getDataSetsToTest(){return _fiducialDataSetNames;}; - - /*! - * \brief Choose which datasets to test. By default it tests all the - * datasets in the fiducial data. A warning will be thrown if not all the - * datasets are being tested. Note that any call to this function will - * overwrite the default values - * - * \param[in] dataSetNames A std::vector of std::strings where each entry is - * a dataset name. Note that it is case sensitive - */ - void setDataSetsToTest(std::vector const &dataSetNames) - {_fiducialDataSetNames = dataSetNames;}; - - /*! - * \brief Set the Compare Num Time Steps object - * - * \param[in] compare Defaults to `true`. If false then the number of timesteps - * is not compared. - */ - void setCompareNumTimeSteps(bool const &compare) - {_compareNumTimeSteps = compare;}; - - /*! - * \brief Set or add a fiducial dataset - * - * \param[in] fieldName The name of the field to be added - * \param[in] dataArr The std::vector for the data vector to be added as - * a data set - */ - void setFiducialData(std::string const &fieldName, - std::vector const &dataVec); - - /*! - * \brief Set the Fiducial Num Time Steps object - * - * \param numTimeSteps The number of time steps in the fiducial data - */ - void setFiducialNumTimeSteps(int const &numTimeSteps) - {_numFiducialTimeSteps = numTimeSteps;}; - - /*! - * \brief Generate an vector of the specified size populated by the specified - * value. - * - * \param[in] value The value to populate the vector with - * \param[in] nx (optional) The size of the field in the x-direction. - * Defaults to 1 - * \param[in] ny (optional) The size of the field in the y-direction. - * Defaults to 1 - * \param[in] nz (optional) The size of the field in the z-direction. - * Defaults to 1 - * \return std::vector A 1-dimensional std::vector of the required - * size containing the data. - */ - std::vector generateConstantData(double const &value, - size_t const &nx=1, - size_t const &ny=1, - size_t const &nz=1); - - /*! - * \brief Load the test data for physical fields from the HDF5 file(s). If - * there is more than one HDF5 file then it concatenates the contents into a - * single vector. Particle data is handeled with _loadTestParticleData - * - * \param[in] dataSetName The name of the dataset to get - * \param[out] testDims An vector with the length of each dimension in it - * \return std::vector A vector containing the data - */ - std::vector loadTestFieldData(std::string dataSetName, - std::vector &testDims); - - /*! - * \brief Generate a std::vector of the specified size populated by a sine - * wave. The equation used to generate the wave is: - * - * wave = offset + amplitude * sin(kx*xIndex + ky*yIndex + kz*zIndex + phase) - * - * \param[in] offset Flat offset from zero - * \param[in] amplitude Amplitude of the wave - * \param[in] kx The x component of the wave vector in pixel units - * \param[in] ky The y component of the wave vector in pixel units - * \param[in] kz The z component of the wave vector in pixel units - * \param[in] phase Phase of the sine wave - * \param[in] nx (optional) The size of the field in the x-direction. - * Defaults to 1 - * \param[in] ny (optional) The size of the field in the y-direction. - * Defaults to 1 - * \param[in] nz (optional) The size of the field in the z-direction. - * Defaults to 1 - * \return std::vector A 1-dimensional std::vector of the required - * size containing the data. - */ - std::vector generateSineData(double const &offset, - double const &litude, - double const &kx, - double const &ky, - double const &kz, - double const &phase, - size_t const &nx=1, - size_t const &ny=1, - size_t const &nz=1); - - // Constructor and Destructor - /*! - * \brief Construct a new System Test Runner object - * - * \param[in] particleData Is there particle data? - * \param[in] hydroData Is there hydro data? - * \param[in] useFiducialFile Indicate if you're using a HDF5 file or will - * generate your own. Defaults to `true`, i.e. using an HDF5 file. Set to - * `false` to generate your own - * \param[in] useSettingsFile Indicate if you're using a settings file. If - * `true` then the settings file is automatically found based on the naming - * convention. If false then the user MUST provide all the required settings - * with the SystemTestRunner::setChollaLaunchParams method - */ - SystemTestRunner(bool const &particleData=false, - bool const &hydroData=true, - bool const &useFiducialFile=true, - bool const &useSettingsFile=true); - ~SystemTestRunner(); - -private: - /// The fiducial dat file - H5::H5File _fiducialFile; - /// The test hydro field data files - std::vector _testHydroFieldsFileVec; - /// The test particle data files - std::vector _testParticlesFileVec; - - /// The path to the Cholla executable - std::string _chollaPath; - /// The full name of the test with an underscore instead of a period. This - /// is the name of many of the input files, the output directory, etc - std::string _fullTestFileName; - /// The path to the Cholla settings file - std::string _chollaSettingsPath; - /// The path to the fiducial data file - std::string _fiducialFilePath; - /// The path to the output directory - std::string _outputDirectory; - /// The path and name of the console output file - std::string _consoleOutputPath; - - /// A list of all the data set names in the fiducial data file - std::vector _fiducialDataSetNames; - /// A list of all the data set names in the test data file - std::vector _testDataSetNames; - - /// The number of fiducial time steps - int _numFiducialTimeSteps; - /// Map of fiducial data sets if we're not using a fiducial file - std::unordered_map> _fiducialDataSets; - - /// The test particle IDs - std::vector _testParticleIDs; - /// The total number of particles in the test dataset - size_t _testTotalNumParticles=0; - /// The fiducial particle IDs - std::vector _fiducialParticleIDs; - /// The total number of particles in the fiducial dataset - size_t _fiducialTotalNumParticles=0; - - /// Flag to indicate if a fiducial HDF5 data file is being used or a - /// programmatically generated H5File object. `true` = use a file, `false` = - /// use generated H5File object - bool _fiducialFileExists = false; - /// Flag to choose whether or not to compare the number of time steps - bool _compareNumTimeSteps = true; - - /// Flag to indicate whether or not there is hydro field data - /// If true then hydro data files are searched for and will be compared to - /// fiducial values. If false then it is assumed that the test produces no - /// hydro field data - bool _hydroDataExists = true; - /// Flag to indicate whether or not there is particle data - /// If true then particle data files are searched for and will be compared - /// to fiducial values. If false then it is assumed that the test produces - /// no particle data - bool _particleDataExists = false; - - - /*! - * \brief Move a file. Throws an exception if the file does not exist. - * or if the move was unsuccessful - * - * \param[in] sourcePath The path the the file to be moved - * \param[in] destinationDirectory The path to the director the file should - * be moved to - */ - void _safeMove(std::string const &sourcePath, - std::string const &destinationDirectory); - - /*! - * \brief Checks if the given file exists. Throws an exception if the - * file does not exist. - * - * \param[in] filePath The path to the file to check for - */ - void _checkFileExists(std::string const &filePath); - - /*! - * \brief Using GTest assertions to check if the fiducial and test data have - * the same number of time steps - * - */ - void _checkNumTimeSteps(); - - /*! - * \brief Load the test data for particles from the HDF5 file(s). If - * there is more than one HDF5 file then it concatenates the contents into a - * single vector. Field data is handeled with _loadTestFieldData - * - * \param[in] dataSetName The name of the dataset to get - * \return std::vector A vector containing the data - */ - std::vector _loadTestParticleData(std::string const &dataSetName); - - /*! - * \brief Load the test data for physical fields from the HDF5 file or - * returns the user set vector. - * Particle data is handeled with _loadFiducialParticleData. - * - * \param[in] dataSetName The name of the dataset to get - * \return std::vector A vector with the contents of the data set - */ - std::vector _loadFiducialFieldData(std::string const &dataSetName); - - /*! - * \brief Load the fiducial data for particles from the HDF5 file or return - * the user set vector. Field data is handeled with _loadFiducialFieldData - * - * \param[in] dataSetName The name of the dataset to get - * \return std::vector A vector containing the data - */ - std::vector _loadFiducialParticleData(std::string const &dataSetName); - - - /*! - * \brief Return a vector of all the dataset names in the given HDF5 file - * - * \param[in] inputFile The HDF5 file to find names in - * \return std::vector - */ - std::vector _findDataSetNames(H5::H5File const &inputFile); -}; // End of class systemTest::SystemTestRunner + public: + /// The number of MPI ranks, defaults to 1 + size_t numMpiRanks = 1; + + /*! + * \brief Set the parameters that Cholla launches with, potentially entirely + * replacing the need for a settings file. A string of the launch parameters + * that will override the values in the settings file (if given). Any of + * Cholla's standard launch paramters work except `outdir` as that is + * reserved for usage in the system_test::SystemTestRunner.runTest() method + */ + std::string chollaLaunchParams; + + /*! + * \brief Run the system test that has been set up + * + */ + void runTest(bool const &compute_L2_norm_only = false, double const &maxAllowedL1Error = 0.0, + double const &maxAllowedError = 0.0); + + /*! + * \brief Compute the L1 error for each field compared to the initial + * conditions. Doesn't work with particle data + * + * \param[in] maxAllowedL1Error The maximum allowed L1 error for this test + * \param[in] maxAllowedError The maximum allowed for any value in the test + * + */ + void runL1ErrorTest(double const &maxAllowedL1Error, double const &maxAllowedError = 1E-7); + + /*! + * \brief Launch Cholla as it is set up + * + */ + void launchCholla(); + + void openHydroTestData(); + + /*! + * \brief Get the Cholla Path object + * + * \return std::string The path to the Cholla executable + */ + std::string getChollaPath() { return _chollaPath; }; + + /*! + * \brief Get the Cholla Settings File Path object + * + * \return std::string The full filename/path to the settings file used to + * initialize Cholla + */ + std::string getChollaSettingsFilePath() { return _chollaSettingsPath; }; + + /*! + * \brief Get the L2Norm + * + * \return double The L2Norm of the last run test + */ + double getL2Norm() { return L2Norm_; }; + + /*! + * \brief Get the Output Directory object + * + * \return std::string The path to the directory where all the output is + * stored + */ + std::string getOutputDirectory() { return _outputDirectory; }; + + /*! + * \brief Get the Console Output Path object + * + * \return std::string The full filename/path to the file where all the + * console output is stored + */ + std::string getConsoleOutputPath() { return _consoleOutputPath; }; + + /*! + * \brief Get the Fiducial File object + * + * \return H5::H5File + */ + H5::H5File getFiducialFile() { return _fiducialFile; }; + + /*! + * \brief Get the Test File object + * + * \param index The MPI rank of the file you want to return. Defaults to 0 + * \return H5::H5File + */ + H5::H5File getTestFile(size_t const &i = 0) { return _testHydroFieldsFileVec[i]; }; + + /*! + * \brief Get the vector of datasets that will be tested + * + * \return std::vector + */ + std::vector getDataSetsToTest() { return _fiducialDataSetNames; }; + + /*! + * \brief Set the Fixed Epsilon value + * + * \param[in] newVal The new value of fixed epsilon + */ + void setFixedEpsilon(double const &newVal) { _fixedEpsilon = newVal; }; + + /*! + * \brief Choose which datasets to test. By default it tests all the + * datasets in the fiducial data. A warning will be thrown if not all the + * datasets are being tested. Note that any call to this function will + * overwrite the default values + * + * \param[in] dataSetNames A std::vector of std::strings where each entry is + * a dataset name. Note that it is case sensitive + */ + void setDataSetsToTest(std::vector const &dataSetNames) { _fiducialDataSetNames = dataSetNames; }; + + /*! + * \brief Set the Compare Num Time Steps object + * + * \param[in] compare Defaults to `true`. If false then the number of + * timesteps is not compared. + */ + void setCompareNumTimeSteps(bool const &compare) { _compareNumTimeSteps = compare; }; + + /*! + * \brief Set or add a fiducial dataset + * + * \param[in] fieldName The name of the field to be added + * \param[in] dataArr The std::vector for the data vector to be added as + * a data set + */ + void setFiducialData(std::string const &fieldName, std::vector const &dataVec); + + /*! + * \brief Set the Fiducial Num Time Steps object + * + * \param numTimeSteps The number of time steps in the fiducial data + */ + void setFiducialNumTimeSteps(int const &numTimeSteps) { _numFiducialTimeSteps = numTimeSteps; }; + + /*! + * \brief Generate an vector of the specified size populated by the specified + * value. + * + * \param[in] value The value to populate the vector with + * \param[in] nx (optional) The size of the field in the x-direction. + * Defaults to 1 + * \param[in] ny (optional) The size of the field in the y-direction. + * Defaults to 1 + * \param[in] nz (optional) The size of the field in the z-direction. + * Defaults to 1 + * \return std::vector A 1-dimensional std::vector of the required + * size containing the data. + */ + std::vector generateConstantData(double const &value, size_t const &nx = 1, size_t const &ny = 1, + size_t const &nz = 1); + + /*! + * \brief Load the test data for physical fields from the HDF5 file(s). If + * there is more than one HDF5 file then it concatenates the contents into a + * single vector. Particle data is handeled with _loadTestParticleData + * + * \param[in] dataSetName The name of the dataset to get + * \param[out] testDims An vector with the length of each dimension in it + * \param[in] file (optional) The vector of HDF5 files to load + * \return std::vector A vector containing the data + */ + std::vector loadTestFieldData(std::string dataSetName, std::vector &testDims, + std::vector file = {}); + + /*! + * \brief Generate a std::vector of the specified size populated by a sine + * wave. The equation used to generate the wave is: + * + * wave = offset + amplitude * sin(kx*xIndex + ky*yIndex + kz*zIndex + phase) + * + * \param[in] offset Flat offset from zero + * \param[in] amplitude Amplitude of the wave + * \param[in] kx The x component of the wave vector in pixel units + * \param[in] ky The y component of the wave vector in pixel units + * \param[in] kz The z component of the wave vector in pixel units + * \param[in] phase Phase of the sine wave + * \param[in] nx (optional) The size of the field in the x-direction. + * Defaults to 1 + * \param[in] ny (optional) The size of the field in the y-direction. + * Defaults to 1 + * \param[in] nz (optional) The size of the field in the z-direction. + * Defaults to 1 + * \return std::vector A 1-dimensional std::vector of the required + * size containing the data. + */ + std::vector generateSineData(double const &offset, double const &litude, double const &kx, + double const &ky, double const &kz, double const &phase, size_t const &nx = 1, + size_t const &ny = 1, size_t const &nz = 1); + + // Constructor and Destructor + /*! + * \brief Construct a new System Test Runner object + * + * \param[in] particleData Is there particle data? + * \param[in] hydroData Is there hydro data? + * \param[in] useFiducialFile Indicate if you're using a HDF5 file or will + * generate your own. Defaults to `true`, i.e. using an HDF5 file. Set to + * `false` to generate your own + * \param[in] useSettingsFile Indicate if you're using a settings file. If + * `true` then the settings file is automatically found based on the naming + * convention. If false then the user MUST provide all the required settings + * with the SystemTestRunner::chollaLaunchParams member variable + */ + SystemTestRunner(bool const &particleData = false, bool const &hydroData = true, bool const &useFiducialFile = true, + bool const &useSettingsFile = true); + ~SystemTestRunner(); + + private: + /// The fiducial dat file + H5::H5File _fiducialFile; + /// The test hydro field data files + std::vector _testHydroFieldsFileVec; + /// The test particle data files + std::vector _testParticlesFileVec; + + /// The path to the Cholla executable + std::string _chollaPath; + /// The full name of the test with an underscore instead of a period. This + /// is the name of many of the input files, the output directory, etc + std::string _fullTestFileName; + /// The path to the Cholla settings file + std::string _chollaSettingsPath; + /// The path to the fiducial data file + std::string _fiducialFilePath; + /// The path to the output directory + std::string _outputDirectory; + /// The path and name of the console output file + std::string _consoleOutputPath; + + /// A list of all the data set names in the fiducial data file + std::vector _fiducialDataSetNames; + /// A list of all the data set names in the test data file + std::vector _testDataSetNames; + + /// The number of fiducial time steps + int _numFiducialTimeSteps; + /// Map of fiducial data sets if we're not using a fiducial file + std::unordered_map> _fiducialDataSets; + + /// The test particle IDs + std::vector _testParticleIDs; + /// The total number of particles in the test dataset + size_t _testTotalNumParticles = 0; + /// The fiducial particle IDs + std::vector _fiducialParticleIDs; + /// The total number of particles in the fiducial dataset + size_t _fiducialTotalNumParticles = 0; + + /// Fixed epsilon is changed from the default since AMD/Clang + /// appear to differ from NVIDIA/GCC/XL by roughly 1E-12 + double _fixedEpsilon = 5.0E-12; + + /// The L2 norm of the error vector + double L2Norm_; + + /// Flag to indicate if a fiducial HDF5 data file is being used or a + /// programmatically generated H5File object. `true` = use a file, `false` = + /// use generated H5File object + bool _fiducialFileExists = false; + /// Flag to choose whether or not to compare the number of time steps + bool _compareNumTimeSteps = true; + + /// Flag to indicate whether or not there is hydro field data + /// If true then hydro data files are searched for and will be compared to + /// fiducial values. If false then it is assumed that the test produces no + /// hydro field data + bool _hydroDataExists = true; + /// Flag to indicate whether or not there is particle data + /// If true then particle data files are searched for and will be compared + /// to fiducial values. If false then it is assumed that the test produces + /// no particle data + bool _particleDataExists = false; + + /*! + * \brief Using GTest assertions to check if the fiducial and test data have + * the same number of time steps + * + */ + void _checkNumTimeSteps(); + + /*! + * \brief Load the test data for particles from the HDF5 file(s). If + * there is more than one HDF5 file then it concatenates the contents into a + * single vector. Field data is handeled with _loadTestFieldData + * + * \param[in] dataSetName The name of the dataset to get + * \return std::vector A vector containing the data + */ + std::vector _loadTestParticleData(std::string const &dataSetName); + + /*! + * \brief Load the test data for physical fields from the HDF5 file or + * returns the user set vector. + * Particle data is handeled with _loadFiducialParticleData. + * + * \param[in] dataSetName The name of the dataset to get + * \return std::vector A vector with the contents of the data set + */ + std::vector _loadFiducialFieldData(std::string const &dataSetName); + + /*! + * \brief Load the fiducial data for particles from the HDF5 file or return + * the user set vector. Field data is handeled with _loadFiducialFieldData + * + * \param[in] dataSetName The name of the dataset to get + * \return std::vector A vector containing the data + */ + std::vector _loadFiducialParticleData(std::string const &dataSetName); + + /*! + * \brief Return a vector of all the dataset names in the given HDF5 file + * + * \param[in] inputFile The HDF5 file to find names in + * \return std::vector + */ + std::vector _findDataSetNames(H5::H5File const &inputFile); +}; // End of class system_test::SystemTestRunner diff --git a/src/utils/DeviceVector.h b/src/utils/DeviceVector.h index 422f3d151..db10a09b4 100644 --- a/src/utils/DeviceVector.h +++ b/src/utils/DeviceVector.h @@ -1,5 +1,5 @@ /*! - * \file device_vector.h + * \file DeviceVector.h * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Contains the declartion and implementation of the DeviceVector * class. Note that since this is a templated class the implementation must be @@ -10,10 +10,11 @@ #pragma once // STL Includes -#include -#include -#include #include +#include +#include +#include +#include // External Includes @@ -27,305 +28,305 @@ // ============================================================================= namespace cuda_utilities { - /*! - * \brief A templatized class to encapsulate a device global memory pointer - * in a std::vector like interface complete with most of the usual methods. - * This class is intended to be used only in host code and does not work - * device side; Passing the pointer to a kernel can be done with the - * `data()` method. This class works for any device side pointer, scalar or - * array valued. - * - * \tparam T Any serialized type where `sizeof(T)` returns correct results - * should work but non-primitive types have not been tested. - */ - template - class DeviceVector - { - public: - /*! - * \brief Construct a new Device Vector object by calling the - * `_allocate` private method - * - * \param[in] size The number of elements desired in the array. Can be - * any positive integer. - */ - DeviceVector(size_t const size) {_allocate(size);} - - /*! - * \brief Destroy the Device Vector object by calling the `_deAllocate` - * private method - * - */ - ~DeviceVector() {_deAllocate();} - - /*! - * \brief Get the raw device pointer - * - * \return T* The pointer for the array in global memory - */ - T* data() {return _ptr;} - - /*! - * \brief Get the number of elements in the array. - * - * \return size_t The number of elements in the array - */ - size_t size() {return _size;} - - /*! - * \brief Overload the [] operator to return a value from device memory. - * This method performs a cudaMemcpy to copy the desired element to the - * host then returns it. Unlike the `at()` method this method does not - * perform bounds checking - * - * \param[in] index The index of the desired value - * \return T The value at dev_ptr[index] - */ - T operator [] (size_t const &index); - - /*! - * \brief Return a value from device memory. This method performs a - * cudaMemcpy to copy the desired element to the host then returns it. - * Unlike the `[]` overload this method perform bounds checking - * - * \param[in] index The index of the desired value - * \return T The value at dev_ptr[index] - */ - T const at(size_t const index); - - /*! - * \brief Assign a single value in the array. Should generally only be - * used when the pointer points to a scalar value. By default this - * writes `hostValue` to the 0th element of the array. - * - * \param[in] hostValue The value to write to the device array - * \param[in] index The location to write the value to, defaults to zero. - */ - void assign(T const &hostValue, size_t const &index=0); - - /*! - * \brief Resize the device container to contain `newSize` elements. If - * `newSize` is greater than the current size then all the values are - * kept and the rest of the array is default initialized. If `newSize` - * is smaller than the current size then the array is truncated and - * values at locations greater than `newSize` are lost. Keeping the - * values in the array requires that the new array be allocated, the - * values be copied, then the old array be freed; as such this method is - * quite slow and can use a large amount of memory. If you don't care - * about the values in the array then use the `reset` method - * - * \param[in] newSize The desired size of the array - */ - void resize(size_t const newSize); - - /*! - * \brief Reset the size of the array. This frees the old array and - * allocates a new one; all values in the array may be lost. The values - * in memory are not initialized and therefore the behaviour of the - * default values is undefined - * - * \param newSize - */ - void reset(size_t const newSize); - - /*! - * \brief Copy the first `arrSize` elements of `arrIn` to the device. - * - * \param[in] arrIn The pointer to the array to be copied to the device - * \param[in] arrSize The number of elements/size of the array to copy - * to the device - */ - void cpyHostToDevice(const T * arrIn, size_t const &arrSize); - - /*! - * \brief Copy the contents of a std::vector to the device - * - * \param[in] vecIn The array whose contents are to be copied - */ - void cpyHostToDevice(std::vector const &vecIn) - {cpyHostToDevice(vecIn.data(), vecIn.size());} - - /*! - * \brief Copy the array from the device to a host array. Checks if the - * host array is large enough based on the `arrSize` parameter. - * - * \param[out] arrOut The pointer to the host array - * \param[in] arrSize The number of elements allocated in the host array - */ - void cpyDeviceToHost(T * arrOut, size_t const &arrSize); - - /*! - * \brief Copy the array from the device to a host std::vector. Checks - * if the host array is large enough. - * - * \param[out] vecOut The std::vector to copy the device array into - */ - void cpyDeviceToHost(std::vector &vecOut) - {cpyDeviceToHost(vecOut.data(), vecOut.size());} - - private: - /// The size of the device array - size_t _size; - - /// The pointer to the device array - T *_ptr=nullptr; - - /*! - * \brief Allocate the device side array - * - * \param[in] size The size of the array to allocate - */ - void _allocate(size_t const size) - { - _size=size; - CudaSafeCall(cudaMalloc(&_ptr, size*sizeof(T))); - } - - /*! - * \brief Free the device side array - * - */ - void _deAllocate(){CudaSafeCall(cudaFree(_ptr));} - }; -} // End of cuda_utilities namespace +/*! + * \brief A templatized class to encapsulate a device global memory pointer + * in a std::vector like interface complete with most of the usual methods. + * This class is intended to be used only in host code and does not work + * device side; Passing the pointer to a kernel can be done with the + * `data()` method. This class works for any device side pointer, scalar or + * array valued. + * + * \tparam T Any trivially copyable type where `sizeof(T)` returns correct + * results should work, but non-primitive types have not been tested. + */ +template +class DeviceVector +{ + static_assert(std::is_trivially_copyable_v, + "DeviceVector can only be used with trivially_copyable types due to the internal " + "usage of functions like cudaMemcpy, cudaMemcpyPeer, cudaMemset"); + + public: + /*! + * \brief Construct a new Device Vector object by calling the + * `_allocate` private method + * + * \param[in] size The number of elements desired in the array. Can be + * any positive integer. + * \param[in] initialize (optional) If true then initialize the GPU + * memory to int(0) + */ + DeviceVector(size_t const size, bool const initialize = false); + + /*! + * \brief Destroy the Device Vector object by calling the `_deAllocate` + * private method + * + */ + ~DeviceVector() { _deAllocate(); } + + /* The following are deleted because they currently lead to invalid state. + * (But they can all easily be implemented in the future). + */ + DeviceVector() = delete; + DeviceVector(const DeviceVector &) = delete; + DeviceVector(DeviceVector &&) = delete; + DeviceVector &operator=(const DeviceVector &other) = delete; + DeviceVector &operator=(DeviceVector &&other) = delete; + + /*! + * \brief Get the raw device pointer + * + * \return T* The pointer for the array in global memory + */ + T *data() { return _ptr; } + + /*! + * \brief Get the number of elements in the array. + * + * \return size_t The number of elements in the array + */ + size_t size() { return _size; } + + /*! + * \brief Overload the [] operator to return a value from device memory. + * This method performs a cudaMemcpy to copy the desired element to the + * host then returns it. Unlike the `at()` method this method does not + * perform bounds checking + * + * \param[in] index The index of the desired value + * \return T The value at dev_ptr[index] + */ + T operator[](size_t const &index); + + /*! + * \brief Return a value from device memory. This method performs a + * cudaMemcpy to copy the desired element to the host then returns it. + * Unlike the `[]` overload this method perform bounds checking + * + * \param[in] index The index of the desired value + * \return T The value at dev_ptr[index] + */ + T at(size_t const index); + + /*! + * \brief Assign a single value in the array. Should generally only be + * used when the pointer points to a scalar value. By default this + * writes `hostValue` to the 0th element of the array. + * + * \param[in] hostValue The value to write to the device array + * \param[in] index The location to write the value to, defaults to zero. + */ + void assign(T const &hostValue, size_t const &index = 0); + + /*! + * \brief Resize the device container to contain `newSize` elements. If + * `newSize` is greater than the current size then all the values are + * kept and the rest of the array is default initialized. If `newSize` + * is smaller than the current size then the array is truncated and + * values at locations greater than `newSize` are lost. Keeping the + * values in the array requires that the new array be allocated, the + * values be copied, then the old array be freed; as such this method is + * quite slow and can use a large amount of memory. If you don't care + * about the values in the array then use the `reset` method + * + * \param[in] newSize The desired size of the array + */ + void resize(size_t const newSize); + + /*! + * \brief Reset the size of the array. This frees the old array and + * allocates a new one; all values in the array may be lost. The values + * in memory are not initialized and therefore the behaviour of the + * default values is undefined + * + * \param newSize + */ + void reset(size_t const newSize); + + /*! + * \brief Copy the first `arrSize` elements of `arrIn` to the device. + * + * \param[in] arrIn The pointer to the array to be copied to the device + * \param[in] arrSize The number of elements/size of the array to copy + * to the device + */ + void cpyHostToDevice(const T *arrIn, size_t const &arrSize); + + /*! + * \brief Copy the contents of a std::vector to the device + * + * \param[in] vecIn The array whose contents are to be copied + */ + void cpyHostToDevice(std::vector const &vecIn) { cpyHostToDevice(vecIn.data(), vecIn.size()); } + + /*! + * \brief Copy the array from the device to a host array. Checks if the + * host array is large enough based on the `arrSize` parameter. + * + * \param[out] arrOut The pointer to the host array + * \param[in] arrSize The number of elements allocated in the host array + */ + void cpyDeviceToHost(T *arrOut, size_t const &arrSize); + + /*! + * \brief Copy the array from the device to a host std::vector. Checks + * if the host array is large enough. + * + * \param[out] vecOut The std::vector to copy the device array into + */ + void cpyDeviceToHost(std::vector &vecOut) { cpyDeviceToHost(vecOut.data(), vecOut.size()); } + + private: + /// The size of the device array + size_t _size; + + /// The pointer to the device array + T *_ptr = nullptr; + + /*! + * \brief Allocate the device side array + * + * \param[in] size The size of the array to allocate + */ + void _allocate(size_t const size) + { + _size = size; + GPU_Error_Check(cudaMalloc(&_ptr, _size * sizeof(T))); + } + + /*! + * \brief Free the device side array + * + */ + void _deAllocate() { GPU_Error_Check(cudaFree(_ptr)); } +}; +} // namespace cuda_utilities // ============================================================================= // End declaration of DeviceVector class // ============================================================================= - // ============================================================================= // Definition of DeviceVector class // ============================================================================= namespace cuda_utilities { +// ========================================================================= +// Public Methods +// ========================================================================= - // ========================================================================= - // Public Methods - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::resize(size_t const newSize) - { - // Assign old array to a new pointer - T * oldDevPtr = _ptr; - - // Determine how many elements to copy - size_t const count = std::min(_size, newSize) * sizeof(T); - - // Allocate new array - _allocate(newSize); - - // Copy the values from the old array to the new array - CudaSafeCall(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count)); - - // Free the old array - CudaSafeCall(cudaFree(oldDevPtr)); - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::reset(size_t const newSize) - { - _deAllocate(); - _allocate(newSize); - } - // ========================================================================= - - // ========================================================================= - template - T DeviceVector::operator [] (size_t const &index) - { - T hostValue; - CudaSafeCall(cudaMemcpy(&hostValue, - &(_ptr[index]), - sizeof(T), - cudaMemcpyDeviceToHost)); - return hostValue; - } - // ========================================================================= - - // ========================================================================= - template - T const DeviceVector::at(size_t const index) - { - if (index < _size) - { - // Use the overloaded [] operator to grab the value from GPU memory - // into host memory - return (*this)[index]; - } - else - { - throw std::out_of_range("Warning: DeviceVector.at() detected an" - " out of bounds memory access. Tried to" - " access element " - + std::to_string(index) - + " of " - + std::to_string(_size)); - } - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::assign(T const &hostValue, size_t const &index) - { - CudaSafeCall(cudaMemcpy(&(_ptr[index]), // destination - &hostValue, // source - sizeof(T), - cudaMemcpyHostToDevice)); - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::cpyHostToDevice(const T * arrIn, size_t const &arrSize) - { - if (arrSize <= _size) - { - CudaSafeCall(cudaMemcpy(_ptr, - arrIn, - arrSize*sizeof(T), - cudaMemcpyHostToDevice)); - } - else - { - throw std::out_of_range("Warning: Couldn't copy array to device," - " device array is too small. Host array" - " size=" - + std::to_string(arrSize) - + ", device array size=" - + std::to_string(arrSize)); - } - - } - // ========================================================================= - - // ========================================================================= - template - void DeviceVector::cpyDeviceToHost(T * arrOut, size_t const &arrSize) - { - if (_size <= arrSize) - { - CudaSafeCall(cudaMemcpy(arrOut, - _ptr, - _size*sizeof(T), - cudaMemcpyDeviceToHost)); - } - else - { - throw std::out_of_range("Warning: Couldn't copy array to host, " - "host array is too small. Host array " - "size=" - + std::to_string(arrSize) - + ", device array size=" - + std::to_string(arrSize)); - } - } - // ========================================================================= -} // end namespace cuda_utilities -// ============================================================================= -// End definition of DeviceVector class -// ============================================================================= \ No newline at end of file +// ========================================================================= +template +DeviceVector::DeviceVector(size_t const size, bool const initialize) +{ + _allocate(size); + + if (initialize) { + GPU_Error_Check(cudaMemset(_ptr, 0, _size * sizeof(T))); + } +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::resize(size_t const newSize) +{ + // Assign old array to a new pointer + T *oldDevPtr = _ptr; + + // Determine how many elements to copy + size_t const count = std::min(_size, newSize) * sizeof(T); + + // Allocate new array + _allocate(newSize); + + // Copy the values from the old array to the new array + GPU_Error_Check(cudaMemcpyPeer(_ptr, 0, oldDevPtr, 0, count)); + + // Free the old array + GPU_Error_Check(cudaFree(oldDevPtr)); +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::reset(size_t const newSize) +{ + _deAllocate(); + _allocate(newSize); +} +// ========================================================================= + +// ========================================================================= +template +T DeviceVector::operator[](size_t const &index) +{ + T hostValue; + GPU_Error_Check(cudaMemcpy(&hostValue, &(_ptr[index]), sizeof(T), cudaMemcpyDeviceToHost)); + return hostValue; +} +// ========================================================================= + +// ========================================================================= +template +T DeviceVector::at(size_t const index) +{ + if (index < _size) { + // Use the overloaded [] operator to grab the value from GPU memory + // into host memory + return (*this)[index]; + } else { + throw std::out_of_range( + "Warning: DeviceVector.at() detected an" + " out of bounds memory access. Tried to" + " access element " + + std::to_string(index) + " of " + std::to_string(_size)); + } +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::assign(T const &hostValue, size_t const &index) +{ + GPU_Error_Check(cudaMemcpy(&(_ptr[index]), // destination + &hostValue, // source + sizeof(T), cudaMemcpyHostToDevice)); +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::cpyHostToDevice(const T *arrIn, size_t const &arrSize) +{ + if (arrSize <= _size) { + GPU_Error_Check(cudaMemcpy(_ptr, arrIn, arrSize * sizeof(T), cudaMemcpyHostToDevice)); + } else { + throw std::out_of_range( + "Warning: Couldn't copy array to device," + " device array is too small. Host array" + " size=" + + std::to_string(arrSize) + ", device array size=" + std::to_string(arrSize)); + } +} +// ========================================================================= + +// ========================================================================= +template +void DeviceVector::cpyDeviceToHost(T *arrOut, size_t const &arrSize) +{ + if (_size <= arrSize) { + GPU_Error_Check(cudaMemcpy(arrOut, _ptr, _size * sizeof(T), cudaMemcpyDeviceToHost)); + } else { + throw std::out_of_range( + "Warning: Couldn't copy array to host, " + "host array is too small. Host array " + "size=" + + std::to_string(arrSize) + ", device array size=" + std::to_string(arrSize)); + } +} +// ========================================================================= +} // end namespace cuda_utilities + // ============================================================================= + // End definition of DeviceVector class + // ============================================================================= \ No newline at end of file diff --git a/src/utils/DeviceVector_tests.cu b/src/utils/DeviceVector_tests.cu index 26a63dbca..6acd84308 100644 --- a/src/utils/DeviceVector_tests.cu +++ b/src/utils/DeviceVector_tests.cu @@ -1,195 +1,195 @@ /*! - * \file device_vector_tests.cu + * \file DeviceVector_tests.cu * \author Robert 'Bob' Caddy (rvc@pitt.edu) * \brief Tests for the DeviceVector class * */ // STL Includes -#include -#include #include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes #include "../global/global.h" -#include "../utils/testing_utilities.h" #include "../utils/DeviceVector.h" +#include "../utils/testing_utilities.h" - -namespace // Anonymous namespace +namespace // Anonymous namespace { - template - void checkPointerAttributes(cuda_utilities::DeviceVector &devVector) - { - // Get the pointer information - cudaPointerAttributes ptrAttributes; - CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); - - // Warning strings - std::string typeMessage = "ptrAttributes.type should be 2 since " - "that indicates type cudaMemoryTypeDevice. " - "0 is cudaMemoryTypeUnregistered, " - "1 is cudaMemoryTypeHost, and " - "3 is cudaMemoryTypeManaged"; - std::string const deviceMessage = "The pointer should be on device 0"; - std::string const devPtrMessage = "The device pointer is nullptr"; - std::string const hostPtrMessage = "The host pointer is not nullptr"; - - // Check that the pointer information is correct - #ifdef O_HIP - typeMessage = "ptrAttributes.memoryType should be 1 since that indicates a HIP device pointer."; - EXPECT_EQ(1, ptrAttributes.memoryType) << typeMessage; - #else // O_HIP is not defined i.e. we're using CUDA - EXPECT_EQ(2, ptrAttributes.type) << typeMessage; - #endif // O_HIP - EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; - EXPECT_NE(nullptr, ptrAttributes.devicePointer) << devPtrMessage; - EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; - } -} // Anonymous namespace +template +void Check_Pointer_Attributes(cuda_utilities::DeviceVector &devVector) +{ + // Get the pointer information + cudaPointerAttributes ptrAttributes; + GPU_Error_Check(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); + + // Warning strings + std::string typeMessage = + "ptrAttributes.type should be 2 since " + "that indicates type cudaMemoryTypeDevice. " + "0 is cudaMemoryTypeUnregistered, " + "1 is cudaMemoryTypeHost, and " + "3 is cudaMemoryTypeManaged"; + std::string const deviceMessage = "The pointer should be on device 0"; + std::string const devPtrMessage = "The device pointer is nullptr"; + std::string const hostPtrMessage = "The host pointer is not nullptr"; + +// Check that the pointer information is correct +#ifdef O_HIP + typeMessage = + "ptrAttributes.memoryType should be 1 since that indicates a HIP device " + "pointer."; + EXPECT_EQ(1, ptrAttributes.memoryType) << typeMessage; +#else // O_HIP is not defined i.e. we're using CUDA + EXPECT_EQ(2, ptrAttributes.type) << typeMessage; +#endif // O_HIP + EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; + EXPECT_NE(nullptr, ptrAttributes.devicePointer) << devPtrMessage; + EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; +} +} // Anonymous namespace // ============================================================================= // Tests for expected behavior // ============================================================================= -TEST(tALLDeviceVectorConstructor, - CheckConstructorDataAndSizeExpectProperAllocationAndValues) +TEST(tALLDeviceVectorConstructor, CheckConstructorDataAndSizeExpectProperAllocationAndValues) { - // Initialize the DeviceVector - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; + // Initialize the DeviceVector + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; - // Check that the size is correct - EXPECT_EQ(vectorSize, devVector.size()); + // Check that the size is correct + EXPECT_EQ(vectorSize, devVector.size()); - // Check the pointer information - checkPointerAttributes(devVector); + // Check the pointer information + Check_Pointer_Attributes(devVector); } -TEST(tALLDeviceVectorDestructor, - CheckDestructorExpectProperDeallocation) +TEST(tALLDeviceVectorDestructor, CheckDestructorExpectProperDeallocation) { - // Initialize the DeviceVector - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - - // Destruct the object - devVector.~DeviceVector(); - - // Get the pointer information - cudaPointerAttributes ptrAttributes; - CudaSafeCall(cudaPointerGetAttributes(&ptrAttributes, devVector.data())); - - // Warning strings - std::string typeMessage = "ptrAttributes.type should be 0 since " - "that indicates type cudaMemoryTypeUnregistered" - "0 is cudaMemoryTypeUnregistered, " - "1 is cudaMemoryTypeHost, " - "2 is cudaMemoryTypeDevice, and" - "3 is cudaMemoryTypeManaged"; - std::string deviceMessage = "The pointer should be null which is device -2"; - std::string const devPtrMessage = "The device pointer is nullptr"; - std::string const hostPtrMessage = "The host pointer is not nullptr"; - - // Check that the pointer information is correct - #ifdef O_HIP - typeMessage = "ptrAttributes.memoryType should be 1 since that indicates a HIP device pointer."; - deviceMessage = "The pointer should be 0"; - EXPECT_EQ(0, ptrAttributes.memoryType) << typeMessage; - EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; - #else // O_HIP is not defined i.e. we're using CUDA - EXPECT_EQ(0, ptrAttributes.type) << typeMessage; - EXPECT_EQ(-2, ptrAttributes.device) << deviceMessage; - #endif // O_HIP - EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage; - EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; + // Initialize the DeviceVector + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + + // Destruct the object + devVector.~DeviceVector(); + + // Get the pointer information + cudaPointerAttributes ptrAttributes; + cudaPointerGetAttributes(&ptrAttributes, devVector.data()); + + // Warning strings + std::string typeMessage = + "ptrAttributes.type should be 0 since " + "that indicates type cudaMemoryTypeUnregistered" + "0 is cudaMemoryTypeUnregistered, " + "1 is cudaMemoryTypeHost, " + "2 is cudaMemoryTypeDevice, and" + "3 is cudaMemoryTypeManaged"; + std::string deviceMessage = "The pointer should be null which is device -2"; + std::string const devPtrMessage = "The device pointer is nullptr"; + std::string const hostPtrMessage = "The host pointer is not nullptr"; + +// Check that the pointer information is correct +#ifdef O_HIP + typeMessage = + "ptrAttributes.memoryType should be 1 since that indicates a HIP device " + "pointer."; + deviceMessage = "The pointer should be 0"; + EXPECT_EQ(0, ptrAttributes.memoryType) << typeMessage; + EXPECT_EQ(0, ptrAttributes.device) << deviceMessage; +#else // O_HIP is not defined i.e. we're using CUDA + EXPECT_EQ(0, ptrAttributes.type) << typeMessage; + EXPECT_EQ(-2, ptrAttributes.device) << deviceMessage; +#endif // O_HIP + EXPECT_EQ(nullptr, ptrAttributes.devicePointer) << devPtrMessage; + EXPECT_EQ(nullptr, ptrAttributes.hostPointer) << hostPtrMessage; + + // Reconstruct DeviceVector object to avoid error + new (&devVector) cuda_utilities::DeviceVector{vectorSize}; } TEST(tALLDeviceVectorStdVectorHostToDeviceCopyAndIndexing, CheckDeviceMemoryValuesAndIndexingOperationsExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Check the values in device memory with both the .at() method and - // overloaded [] operator - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), devVector.at(i)); - EXPECT_EQ(stdVec.at(i), devVector[i]); - } + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Check the values in device memory with both the .at() method and + // overloaded [] operator + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), devVector.at(i)); + EXPECT_EQ(stdVec.at(i), devVector[i]); + } } TEST(tALLDeviceVectorArrayHostToDeviceCopyAndIndexing, CheckDeviceMemoryValuesAndIndexingOperationsExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec.data(), stdVec.size()); - - // Check the values in device memory with both the .at() method and - // overloaded [] operator - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), devVector.at(i)); - EXPECT_EQ(stdVec.at(i), devVector[i]); - } + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec.data(), stdVec.size()); + + // Check the values in device memory with both the .at() method and + // overloaded [] operator + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), devVector.at(i)); + EXPECT_EQ(stdVec.at(i), devVector[i]); + } } -TEST(tALLDeviceVectorArrayAssignmentMethod, - AssignSingleValuesExpectCorrectMemoryValues) +TEST(tALLDeviceVectorArrayAssignmentMethod, AssignSingleValuesExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; - // Perform assignment - devVector.assign(13); - devVector.assign(17,4); + // Perform assignment + devVector.assign(13); + devVector.assign(17, 4); - // Check the values in device memory - EXPECT_EQ(13, devVector.at(0)); - EXPECT_EQ(17, devVector.at(4)); + // Check the values in device memory + EXPECT_EQ(13, devVector.at(0)); + EXPECT_EQ(17, devVector.at(4)); } -TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, - CheckHostMemoryValuesExpectCorrectMemoryValues) +TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize), hostVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Copy the values to the host memory - devVector.cpyDeviceToHost(hostVec); - - // Check the values - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), hostVec.at(i)); - } + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize), hostVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Copy the values to the host memory + devVector.cpyDeviceToHost(hostVec); + + // Check the values + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), hostVec.at(i)); + } } -TEST(tALLDeviceVectorArrayDeviceToHostCopy, - CheckHostMemoryValuesExpectCorrectMemoryValues) +TEST(tALLDeviceVectorArrayDeviceToHostCopy, CheckHostMemoryValuesExpectCorrectMemoryValues) { // Initialize the vectors size_t const vectorSize = 10; @@ -204,145 +204,138 @@ TEST(tALLDeviceVectorArrayDeviceToHostCopy, devVector.cpyDeviceToHost(hostVec.data(), hostVec.size()); // Check the values - for (size_t i = 0; i < vectorSize; i++) - { - EXPECT_EQ(stdVec.at(i), hostVec.at(i)); + for (size_t i = 0; i < vectorSize; i++) { + EXPECT_EQ(stdVec.at(i), hostVec.at(i)); } } -TEST(tALLDeviceVectorReset, - SetNewSizeExpectCorrectSize) +TEST(tALLDeviceVectorReset, SetNewSizeExpectCorrectSize) { - // Initialize the vectors - size_t const vectorSize = 10; - size_t const newSize = 20; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize), newVec(newSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - std::iota(newVec.begin(), newVec.end(), 20); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Reset the vector - devVector.reset(newSize); - - // Check the size - EXPECT_EQ(newSize, devVector.size()); - - // Check the pointer - checkPointerAttributes(devVector); - - // Copy the new values into device memory - devVector.cpyHostToDevice(newVec); - - // Check the values - for (size_t i = 0; i < newSize; i++) - { - EXPECT_EQ(newVec.at(i), devVector.at(i)); - } + // Initialize the vectors + size_t const vectorSize = 10; + size_t const newSize = 20; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize), newVec(newSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + std::iota(newVec.begin(), newVec.end(), 20); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Reset the vector + devVector.reset(newSize); + + // Check the size + EXPECT_EQ(newSize, devVector.size()); + + // Check the pointer + Check_Pointer_Attributes(devVector); + + // Copy the new values into device memory + devVector.cpyHostToDevice(newVec); + + // Check the values + for (size_t i = 0; i < newSize; i++) { + EXPECT_EQ(newVec.at(i), devVector.at(i)); + } } -TEST(tALLDeviceVectorResize, - SetLargerSizeExpectCorrectSize) +TEST(tALLDeviceVectorResize, SetLargerSizeExpectCorrectSize) { - // Initialize the vectors - size_t const originalSize = 10; - size_t const newSize = 20; - cuda_utilities::DeviceVector devVector{originalSize}; - std::vector stdVec(originalSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Reset the vector - devVector.resize(newSize); - - // Check the size - EXPECT_EQ(newSize, devVector.size()); - - // Check the pointer - checkPointerAttributes(devVector); - - // Check the values - for (size_t i = 0; i < originalSize; i++) - { - double const fiducialValue = (i < stdVec.size())? stdVec.at(i): 0; - EXPECT_EQ(fiducialValue, devVector.at(i)); - } + // Initialize the vectors + size_t const originalSize = 10; + size_t const newSize = 20; + cuda_utilities::DeviceVector devVector{originalSize}; + std::vector stdVec(originalSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Reset the vector + devVector.resize(newSize); + + // Check the size + EXPECT_EQ(newSize, devVector.size()); + + // Check the pointer + Check_Pointer_Attributes(devVector); + + // Check the values + for (size_t i = 0; i < originalSize; i++) { + double const fiducialValue = (i < stdVec.size()) ? stdVec.at(i) : 0; + EXPECT_EQ(fiducialValue, devVector.at(i)); + } } -TEST(tALLDeviceVectorResize, - SetSmallerSizeExpectCorrectSize) +TEST(tALLDeviceVectorResize, SetSmallerSizeExpectCorrectSize) { - // Initialize the vectors - size_t const vectorSize = 10; - size_t const newSize = 5; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); - - // Reset the vector - devVector.resize(newSize); - - // Check the size - EXPECT_EQ(newSize, devVector.size()); - - // Check the pointer - checkPointerAttributes(devVector); - - // Check the values - for (size_t i = 0; i < newSize; i++) - { - EXPECT_EQ(stdVec.at(i), devVector.at(i)); - } + // Initialize the vectors + size_t const vectorSize = 10; + size_t const newSize = 5; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); + + // Reset the vector + devVector.resize(newSize); + + // Check the size + EXPECT_EQ(newSize, devVector.size()); + + // Check the pointer + Check_Pointer_Attributes(devVector); + + // Check the values + for (size_t i = 0; i < newSize; i++) { + EXPECT_EQ(stdVec.at(i), devVector.at(i)); + } } // ============================================================================= // Tests for exceptions // ============================================================================= -TEST(tALLDeviceVectorAt, - OutOfBoundsAccessExpectThrowOutOfRange) +TEST(tALLDeviceVectorAt, OutOfBoundsAccessExpectThrowOutOfRange) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); - // Copy the value to the device memory - devVector.cpyHostToDevice(stdVec); + // Copy the value to the device memory + devVector.cpyHostToDevice(stdVec); - // Check that the .at() method throws the correct exception - EXPECT_THROW(devVector.at(100), std::out_of_range); + // Check that the .at() method throws the correct exception + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto) + EXPECT_THROW(devVector.at(100), std::out_of_range); } -TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, - OutOfBoundsCopyExpectThrowOutOfRange) +TEST(tALLDeviceVectorStdVectorHostToDeviceCopy, OutOfBoundsCopyExpectThrowOutOfRange) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(2*vectorSize); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range); + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(2 * vectorSize); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto) + EXPECT_THROW(devVector.cpyHostToDevice(stdVec), std::out_of_range); } -TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, - OutOfBoundsCopyExpectThrowOutOfRange) +TEST(tALLDeviceVectorStdVectorDeviceToHostCopy, OutOfBoundsCopyExpectThrowOutOfRange) { - // Initialize the vectors - size_t const vectorSize = 10; - cuda_utilities::DeviceVector devVector{vectorSize}; - std::vector stdVec(vectorSize/2); - std::iota(stdVec.begin(), stdVec.end(), 0); - - // Copy the value to the device memory - EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range); + // Initialize the vectors + size_t const vectorSize = 10; + cuda_utilities::DeviceVector devVector{vectorSize}; + std::vector stdVec(vectorSize / 2); + std::iota(stdVec.begin(), stdVec.end(), 0); + + // Copy the value to the device memory + // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto,hicpp-avoid-goto) + EXPECT_THROW(devVector.cpyDeviceToHost(stdVec), std::out_of_range); } diff --git a/src/utils/cuda_utilities.cpp b/src/utils/cuda_utilities.cpp index a924b3f76..142266159 100644 --- a/src/utils/cuda_utilities.cpp +++ b/src/utils/cuda_utilities.cpp @@ -1,5 +1,37 @@ +/*! + * \file cuda_utilities.cpp + * \brief Implementation file for cuda_utilities.h + * + */ #include "../utils/cuda_utilities.h" -namespace cuda_utilities { +#include +#include -} // end namespace cuda_utilities +#include "../io/io.h" +#include "../mpi/mpi_routines.h" + +namespace cuda_utilities +{ +void Print_GPU_Memory_Usage(std::string const &additional_text) +{ + // Get the memory usage + size_t gpu_free_memory, gpu_total_memory; + GPU_Error_Check(cudaMemGetInfo(&gpu_free_memory, &gpu_total_memory)); + + // Assuming that all GPUs in the system have the same amount of memory + size_t const gpu_used_memory = Reduce_size_t_Max(gpu_total_memory - gpu_free_memory); + + Real const percent_used = 100.0 * (static_cast(gpu_used_memory) / static_cast(gpu_total_memory)); + + // Prep the message to print + std::stringstream output_message_stream; + output_message_stream << std::fixed << std::setprecision(2); + output_message_stream << "Percentage of GPU memory used: " << percent_used << "%. GPU memory used " + << std::to_string(gpu_used_memory) << ", GPU total memory " << std::to_string(gpu_total_memory) + << additional_text << std::endl; + std::string output_message = output_message_stream.str(); + + chprintf(output_message.c_str()); +} +} // end namespace cuda_utilities diff --git a/src/utils/cuda_utilities.h b/src/utils/cuda_utilities.h index 9c07a95a6..85927d532 100644 --- a/src/utils/cuda_utilities.h +++ b/src/utils/cuda_utilities.h @@ -7,93 +7,131 @@ #pragma once +#include + // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" - namespace cuda_utilities { - /*! - * \brief Compute the x, y, and z indices based off of the 1D index - * - * \param[in] id The 1D index - * \param[in] nx The total number of cells in the x direction - * \param[in] ny The total number of cells in the y direction - * \param[out] xid The x index - * \param[out] yid The y index - * \param[out] zid The z index - */ - inline __host__ __device__ void compute3DIndices(int const &id, - int const &nx, - int const &ny, - int &xid, - int &yid, - int &zid) - { - zid = id / (nx * ny); - yid = (id - zid * nx * ny) / nx; - xid = id - zid * nx * ny - yid * nx; - } +/*! + * \brief Compute the x, y, and z indices based off of the 1D index + * + * \param[in] id The 1D index + * \param[in] nx The total number of cells in the x direction + * \param[in] ny The total number of cells in the y direction + * \param[out] xid The x index + * \param[out] yid The y index + * \param[out] zid The z index + */ +inline __host__ __device__ void compute3DIndices(int const &id, int const &nx, int const &ny, int &xid, int &yid, + int &zid) +{ + zid = id / (nx * ny); + yid = (id - zid * nx * ny) / nx; + xid = id - zid * nx * ny - yid * nx; +} - /*! - * \brief Compute the 1D index based off of the 3D indices - * - * \param xid The x index - * \param yid The y index - * \param zid The z index - * \param nx The total number of cells in the x direction - * \param ny The total number of cells in the y direction - * \return int The 1D index - */ - inline __host__ __device__ int compute1DIndex(int const &xid, - int const &yid, - int const &zid, - int const &nx, - int const &ny) - { - return xid + yid*nx + zid*nx*ny; - } +/*! + * \brief Compute the 1D index based off of the 3D indices + * + * \param xid The x index + * \param yid The y index + * \param zid The z index + * \param nx The total number of cells in the x direction + * \param ny The total number of cells in the y direction + * \return int The 1D index + */ +inline __host__ __device__ int compute1DIndex(int const &xid, int const &yid, int const &zid, int const &nx, + int const &ny) +{ + return xid + yid * nx + zid * nx * ny; +} - inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz, int &is, int &ie, int &js, int &je, int &ks, int &ke) { - is = n_ghost; - ie = nx - n_ghost; - if (ny == 1) { - js = 0; - je = 1; - } else { - js = n_ghost; - je = ny - n_ghost; - } - if (nz == 1) { - ks = 0; - ke = 1; - } else { - ks = n_ghost; - ke = nz - n_ghost; - } - } +inline __host__ __device__ void Get_Real_Indices(int const &n_ghost, int const &nx, int const &ny, int const &nz, + int &is, int &ie, int &js, int &je, int &ks, int &ke) +{ + is = n_ghost; + ie = nx - n_ghost; + if (ny == 1) { + js = 0; + je = 1; + } else { + js = n_ghost; + je = ny - n_ghost; + } + if (nz == 1) { + ks = 0; + ke = 1; + } else { + ks = n_ghost; + ke = nz - n_ghost; + } +} - // ========================================================================= - /*! - * \brief Set the value that `pointer` points at in GPU memory to `value`. - * This only sets the first value in memory so if `pointer` points to an - * array then only `pointer[0]` will be set; i.e. this effectively does - * `pointer = &value` - * - * \tparam T Any scalar type - * \param[in] pointer The location in GPU memory - * \param[in] value The value to set `*pointer` to - */ - template - void setScalarDeviceMemory(T *pointer, T const value) - { - CudaSafeCall( - cudaMemcpy(pointer, // destination - &value, // source - sizeof(T), - cudaMemcpyHostToDevice)); +/*! + * \brief Initialize GPU memory + * + * \param[in] ptr The pointer to GPU memory + * \param[in] N The size of the array in bytes + */ +inline void initGpuMemory(Real *ptr, size_t N) { GPU_Error_Check(cudaMemset(ptr, 0, N)); } + +// ===================================================================== +/*! + * \brief Struct to determine the optimal number of blocks and threads + * per block to use when launching a kernel. The member + * variables are `threadsPerBlock` and `numBlocks` which are chosen with + * the occupancy API. + * + */ +template +struct AutomaticLaunchParams { + public: + /*! + * \brief Construct a new AutomaticLaunchParams object. By default it + * generates values of numBlocks and threadsPerBlock suitable for a + * kernel with a grid-stride loop. For a kernel with one thread per + * element set the optional `numElements` argument to the number of + * elements + * + * \param[in] kernel The kernel to determine the launch parameters for + * \param[in] numElements The number of elements in the array that + the kernel operates on + */ + AutomaticLaunchParams(T &kernel, size_t numElements = 0) + { + cudaOccupancyMaxPotentialBlockSize(&numBlocks, &threadsPerBlock, kernel, 0, 0); + + if (numElements > 0) { + // This line is needed to check that threadsPerBlock isn't zero. Somewhere inside + // cudaOccupancyMaxPotentialBlockSize threadsPerBlock can be zero according to clang-tidy so this line sets it to + // a more reasonable value + threadsPerBlock = (threadsPerBlock == 0) ? TPB : threadsPerBlock; + + // Compute the number of blocks + numBlocks = (numElements + threadsPerBlock - 1) / threadsPerBlock; } - // ========================================================================= -} \ No newline at end of file + } + + /// Defaulted Destructor + ~AutomaticLaunchParams() = default; + + /// The maximum number of threads per block that the device supports + int threadsPerBlock; + /// The maximum number of scheduleable blocks on the device + int numBlocks; +}; +// ===================================================================== + +// ===================================================================== +/*! + * \brief Print the current GPU memory usage to standard out + * + * \param additional_text Any additional text to be appended to the end of the message + */ +void Print_GPU_Memory_Usage(std::string const &additional_text = ""); +// ===================================================================== +} // end namespace cuda_utilities \ No newline at end of file diff --git a/src/utils/cuda_utilities_tests.cpp b/src/utils/cuda_utilities_tests.cpp index ddefebfd7..ab35d28d2 100644 --- a/src/utils/cuda_utilities_tests.cpp +++ b/src/utils/cuda_utilities_tests.cpp @@ -1,23 +1,24 @@ /*! * \file cuda_utilities_tests.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu) - * \brief Tests for the contents of cuda_utilities.h and cuda_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie + * (helenarichie@pitt.edu) \brief Tests for the contents of cuda_utilities.h and + * cuda_utilities.cpp * */ // STL Includes -#include -#include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/cuda_utilities.h" #include "../global/global.h" +#include "../utils/cuda_utilities.h" +#include "../utils/testing_utilities.h" /* PCM : n_ghost = 2 @@ -31,112 +32,88 @@ // Local helper functions namespace { - struct TestParams - { - std::vector n_ghost {2, 2, 3, 4}; - std::vector nx {100, 2048, 2048, 2048}; - std::vector ny {1, 2048, 2048, 2048}; - std::vector nz {1, 4096, 4096, 4096}; - std::vector names {"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case", "Large PPMP/PPMC case"}; - - }; -} - -TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector> fiducial_indices {{2, 98, 0, 1, 0, 1}, - {2, 2046, 2, 2046, 2, 4094}, - {3, 2045, 3, 2045, 3, 4093}, - {4, 2044, 4, 2044, 4, 4092}}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - int is; - int ie; - int js; - int je; - int ks; - int ke; - cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), parameters.nz.at(i), is, ie, js, je, ks, ke); - - std::vector index_names {"is", "ie", "js", "je", "ks", "ke"}; - std::vector test_indices {is, ie, js, je, ks, ke}; - - for (size_t j = 0; j < test_indices.size(); j++) - { - testingUtilities::checkResults(fiducial_indices[i][j], test_indices[j], index_names[j] + " " + parameters.names[i]); - } +struct TestParams { + std::vector n_ghost{2, 2, 3, 4}; + std::vector nx{100, 2048, 2048, 2048}; + std::vector ny{1, 2048, 2048, 2048}; + std::vector nz{1, 4096, 4096, 4096}; + std::vector names{"Single-cell 3D PCM/PLMP case", "Large 3D PCM/PLMP case", "Large PLMC case", + "Large PPMP/PPMC case"}; +}; +} // namespace + +TEST(tHYDROCudaUtilsGetRealIndices, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector> fiducial_indices{ + {2, 98, 0, 1, 0, 1}, {2, 2046, 2, 2046, 2, 4094}, {3, 2045, 3, 2045, 3, 4093}, {4, 2044, 4, 2044, 4, 4092}}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + int is; + int ie; + int js; + int je; + int ks; + int ke; + cuda_utilities::Get_Real_Indices(parameters.n_ghost.at(i), parameters.nx.at(i), parameters.ny.at(i), + parameters.nz.at(i), is, ie, js, je, ks, ke); + + std::vector index_names{"is", "ie", "js", "je", "ks", "ke"}; + std::vector test_indices{is, ie, js, je, ks, ke}; + + for (size_t j = 0; j < test_indices.size(); j++) { + testing_utilities::Check_Results(fiducial_indices[i][j], test_indices[j], + index_names[j] + " " + parameters.names[i]); } + } } // ============================================================================= -TEST(tALLCompute3DIndices, - CorrectInputExpectCorrectOutput) +TEST(tALLCompute3DIndices, CorrectInputExpectCorrectOutput) { - // Parameters - int const id = 723; - int const nx = 34; - int const ny = 14; - - // Fiducial Data - int const fiducialXid = 9; - int const fiducialYid = 7; - int const fiducialZid = 1; - - // Test Variables - int testXid; - int testYid; - int testZid; - - // Get test data - cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid); - - EXPECT_EQ(fiducialXid, testXid); - EXPECT_EQ(fiducialYid, testYid); - EXPECT_EQ(fiducialZid, testZid); + // Parameters + int const id = 723; + int const nx = 34; + int const ny = 14; + + // Fiducial Data + int const fiducialXid = 9; + int const fiducialYid = 7; + int const fiducialZid = 1; + + // Test Variables + int testXid; + int testYid; + int testZid; + + // Get test data + cuda_utilities::compute3DIndices(id, nx, ny, testXid, testYid, testZid); + + EXPECT_EQ(fiducialXid, testXid); + EXPECT_EQ(fiducialYid, testYid); + EXPECT_EQ(fiducialZid, testZid); } // ============================================================================= // ============================================================================= -TEST(tALLCompute1DIndex, - CorrectInputExpectCorrectOutput) +TEST(tALLCompute1DIndex, CorrectInputExpectCorrectOutput) { - // Parameters - int const xid = 72; - int const yid = 53; - int const zid = 14; - int const nx = 128; - int const ny = 64; - - // Fiducial Data - int const fiducialId = 121544; - - // Test Variable - int testId; + // Parameters + int const xid = 72; + int const yid = 53; + int const zid = 14; + int const nx = 128; + int const ny = 64; - // Get test data - testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - - EXPECT_EQ(fiducialId, testId); -} -// ============================================================================= - -// ============================================================================= -TEST(tALLSetScalarDeviceMemory, - TypeDoubleInputExpectCorrectValueSet) -{ - double value = 173.246; - double *dev_ptr, host_val; - CudaSafeCall(cudaMalloc(&dev_ptr, sizeof(double))); + // Fiducial Data + int const fiducialId = 121544; - cuda_utilities::setScalarDeviceMemory(dev_ptr, value); + // Test Variable + int testId; - CudaSafeCall( - cudaMemcpy(&host_val, // destination - dev_ptr, // source - sizeof(double), - cudaMemcpyDeviceToHost)); + // Get test data + testId = cuda_utilities::compute1DIndex(xid, yid, zid, nx, ny); - EXPECT_EQ(value, host_val); + EXPECT_EQ(fiducialId, testId); } // ============================================================================= diff --git a/src/utils/debug_utilities.cu b/src/utils/debug_utilities.cu new file mode 100644 index 000000000..20720583f --- /dev/null +++ b/src/utils/debug_utilities.cu @@ -0,0 +1,60 @@ +#include + +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../io/io.h" // provides chprintf +#include "../utils/error_handling.h" // provides chexit + +__global__ void Dump_Values_Kernel(Real* device_array, int array_size, int marker) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= array_size) { + return; + } + kernel_printf("Dump Values: marker %d tid %d value %g \n", marker, tid, device_array[tid]); +} + +/* + Prints out all values of a device_array + */ +void Dump_Values(Real* device_array, int array_size, int marker) +{ + int ngrid = (array_size + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Dump_Values_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, marker); +} + +__global__ void Check_For_Nan_Kernel(Real* device_array, int array_size, int check_num, bool* out_bool) +{ + int tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= array_size) { + return; + } + if (device_array[tid] == device_array[tid]) { + return; + } + out_bool[0] = true; + kernel_printf("Check_For_Nan_Kernel found Nan Checknum: %d Thread: %d\n", check_num, tid); +} + +/* + Checks a device_array for NaN and prints/exits if found + */ +void Check_For_Nan(Real* device_array, int array_size, int check_num) +{ + bool host_out_bool[1] = {false}; + bool* out_bool; + GPU_Error_Check(cudaMalloc((void**)&out_bool, sizeof(bool))); + cudaMemcpy(out_bool, host_out_bool, sizeof(bool), cudaMemcpyHostToDevice); + int ngrid = (array_size + TPB - 1) / TPB; + dim3 dim1dGrid(ngrid, 1, 1); + dim3 dim1dBlock(TPB, 1, 1); + hipLaunchKernelGGL(Check_For_Nan_Kernel, dim1dGrid, dim1dBlock, 0, 0, device_array, array_size, check_num, out_bool); + cudaMemcpy(host_out_bool, out_bool, sizeof(bool), cudaMemcpyDeviceToHost); + cudaFree(out_bool); + + if (host_out_bool[0]) { + chexit(-1); + } +} diff --git a/src/utils/error_check_cuda.cu b/src/utils/error_check_cuda.cu index 32aa2274e..153106b10 100644 --- a/src/utils/error_check_cuda.cu +++ b/src/utils/error_check_cuda.cu @@ -1,30 +1,30 @@ /*! \file error_check_cuda.cu * \brief Error Check Cuda */ -#ifdef CUDA - +#include #include #include -#include -#include "../utils/gpu.hpp" + #include "../global/global.h" #include "../global/global_cuda.h" #include "../io/io.h" #include "../utils/error_check_cuda.h" +#include "../utils/gpu.hpp" - -__global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, int *return_value){ - +__global__ void Check_Value_Along_Axis(Real *dev_array, int n_field, int nx, int ny, int nz, int n_ghost, + int *return_value) +{ int tid_j = blockIdx.x * blockDim.x + threadIdx.x; int tid_k = blockIdx.y * blockDim.y + threadIdx.y; - - if ( blockDim.x != N_Y || blockDim.y != N_Z ){ - if ( tid_j == 0 && tid_k == 0 ) printf("ERROR CHECK: Block Dimension Error \n" ); + if (blockDim.x != N_Y || blockDim.y != N_Z) { + if (tid_j == 0 && tid_k == 0) { + printf("ERROR CHECK: Block Dimension Error \n"); + } return; } - __shared__ Real sh_data[N_Z*N_Y]; + __shared__ Real sh_data[N_Z * N_Y]; // int n_cells, indx_x, indx_3d, indx_2d; Real field_value; @@ -35,45 +35,38 @@ __global__ void Check_Value_Along_Axis( Real *dev_array, int n_field, int nx, in int error = 0; indx_x = 0; - for ( indx_x=0; indx_x -void chexit(int code) -{ - if(code==0) - { +#include +#include +#include +#include +#include + +#ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" +[[noreturn]] void chexit(int code) +{ + if (code == 0) { /*exit normally*/ MPI_Finalize(); exit(code); - }else{ - + } else { /*exit with non-zero error code*/ - MPI_Abort(MPI_COMM_WORLD,code); + MPI_Abort(MPI_COMM_WORLD, code); exit(code); - } } #else /*MPI_CHOLLA*/ -void chexit(int code) +[[noreturn]] void chexit(int code) { /*exit using code*/ exit(code); } #endif /*MPI_CHOLLA*/ + +void Check_Configuration(Parameters const& P) +{ +// General Checks +// ============== +#ifndef GIT_HASH + #error "GIT_HASH is not defined" +#endif //! GIT_HASH + + // Check that GIT_HASH is the correct length. It needs to be 41 and not 40 since strings are null terminated + static_assert(sizeof(GIT_HASH) == 41); + +#ifndef MACRO_FLAGS + #error "MACRO_FLAGS is not defined" +#endif //! MACRO_FLAGS + + // Check that MACRO_FLAGS has contents + static_assert(sizeof(MACRO_FLAGS) > 1); + +// Can only have one integrator enabled +#if ((defined(VL) + defined(CTU) + defined(SIMPLE)) != 1) + #error "Only one integrator can be enabled at a time." +#endif // Only one integrator check + + // Check the boundary conditions + auto Check_Boundary = [](int const& boundary, std::string const& direction) { + bool is_allowed_bc = boundary >= 0 and boundary <= 4; + CHOLLA_ASSERT(is_allowed_bc, + "WARNING: Possibly invalid boundary conditions for direction: %s flag: %d. Must " + "select between 0 (no boundary), 1 (periodic), 2 (reflective), 3 (transmissive), " + "4 (custom), 5 (mpi).", + direction.c_str(), boundary); + }; + Check_Boundary(P.xl_bcnd, "xl_bcnd"); + Check_Boundary(P.xu_bcnd, "xu_bcnd"); + Check_Boundary(P.yl_bcnd, "yl_bcnd"); + Check_Boundary(P.yu_bcnd, "yu_bcnd"); + Check_Boundary(P.zl_bcnd, "zl_bcnd"); + Check_Boundary(P.zu_bcnd, "zu_bcnd"); + + // warn if error checking is disabled +#ifndef DISABLE_GPU_ERROR_CHECKING + // NOLINTNEXTLINE(clang-diagnostic-#warnings) + #warning "CUDA error checking is disabled. Enable it by compiling without the DISABLE_GPU_ERROR_CHECKING macro." +#endif //! DISABLE_GPU_ERROR_CHECKING + + // Check that PRECISION is 2 +#ifndef PRECISION + #error "The PRECISION macro is required" +#endif //! PRECISION + static_assert(PRECISION == 2, "PRECISION must be 2. Single precision is not currently supported"); + +// MHD Checks +// ========== +#ifdef MHD + assert(P.nx > 1 and P.ny > 1 and P.nz > 1 and "MHD runs must be 3D"); + + // Must use the correct integrator + #if !defined(VL) || defined(SIMPLE) || defined(CTU) + #error "MHD only supports the Van Leer integrator" + #endif //! VL or SIMPLE + + // must only use HLLD + #if !defined(HLLD) || defined(EXACT) || defined(ROE) || defined(HLL) || defined(HLLC) + #error "MHD only supports the HLLD Riemann Solver" + #endif //! HLLD or EXACT or ROE or HLL or HLLC + + // May only use certain reconstructions + #if ((defined(PCM) + defined(PLMC) + defined(PPMC)) != 1) || defined(PLMP) || defined(PPMP) + #error "MHD only supports PCM, PLMC, and PPMC reconstruction" + #endif // Reconstruction check + + // must have HDF5 + #if defined(OUTPUT) and (not defined(HDF5)) + #error "MHD only supports HDF5 output" + #endif //! HDF5 + + // Warn that diode boundaries are disabled + if (P.xl_bcnd == 3 or P.xu_bcnd == 3 or P.yl_bcnd == 3 or P.yu_bcnd == 3 or P.zl_bcnd == 3 or P.zu_bcnd == 3) { + std::cerr << "Warning: The diode on the outflow boundaries is disabled for MHD" << std::endl; + } + + // Error if unsupported boundary condition is used + assert(P.xl_bcnd != 2 or P.xu_bcnd != 2 or P.yl_bcnd != 2 or P.yu_bcnd != 2 or P.zl_bcnd != 2 or + P.zu_bcnd != 2 && "MHD does not support reflective boundary conditions"); + + // AVERAGE_SLOW_CELLS not supported on MHD + #ifdef AVERAGE_SLOW_CELLS + #error "MHD does not support AVERAGE_SLOW_CELLS" + #endif // AVERAGE_SLOW_CELLS + +#endif // MHD +} + +// NOLINTNEXTLINE(cert-dcl50-cpp) +[[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...) +{ + // considerations when using MPI: + // - all processes must execute this function to catch errors that happen on + // just one process + // - to handle cases where all processes encounter the same error, we + // pre-buffer the error message (so that the output remains legible) + + // since we are aborting, it's OK that this isn't the most optimized + + // prepare some info for the error message header + const char* sanitized_func_name = (func_name == nullptr) ? "{unspecified}" : func_name; + +#ifdef MPI_CHOLLA + std::string proc_info = std::to_string(procID) + " / " + std::to_string(nproc) + " (using MPI)"; +#else + std::string proc_info = "0 / 1 (NOT using MPI)"; +#endif + + // prepare the formatted message + std::string msg_buf; + if (msg == nullptr) { + msg_buf = "{nullptr encountered instead of error message}"; + } else { + std::va_list args, args_copy; + va_start(args, msg); + va_copy(args_copy, args); + + // The clang-analyzer-valist.Uninitialized is bugged and triggers improperly on this line + // NOLINTNEXTLINE(clang-analyzer-valist.Uninitialized) + std::size_t bufsize_without_terminator = std::vsnprintf(nullptr, 0, msg, args); + va_end(args); + + // NOTE: starting in C++17 it's possible to mutate msg_buf by mutating msg_buf.data() + + // we initialize a msg_buf with size == bufsize_without_terminator (filled with ' ' chars) + // - msg_buf.data() returns a ptr with msg_buf.size() + 1 characters. We are allowed to + // mutate any of the first msg_buf.size() characters. The entry at + // msg_buf.data()[msg_buf.size()] is initially '\0' (& it MUST remain equal to '\0') + // - the 2nd argument of std::vsnprintf is the size of the output buffer. We NEED to + // include the terminator character in this argument, otherwise the formatted message + // will be truncated + msg_buf = std::string(bufsize_without_terminator, ' '); + std::vsnprintf(msg_buf.data(), bufsize_without_terminator + 1, msg, args_copy); + va_end(args_copy); + } + + // now write the error and exit + std::fprintf(stderr, + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" + "Error occurred in %s on line %d\n" + "Function: %s\n" + "Rank: %s\n" + "Message: %s\n" + "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n", + file_name, line_num, sanitized_func_name, proc_info.data(), msg_buf.data()); + std::fflush(stderr); // may be unnecessary for stderr + chexit(1); +} \ No newline at end of file diff --git a/src/utils/error_handling.h b/src/utils/error_handling.h index 174c083b6..8ce08e2ca 100644 --- a/src/utils/error_handling.h +++ b/src/utils/error_handling.h @@ -1,5 +1,71 @@ #ifndef ERROR_HANDLING_CHOLLA_H #define ERROR_HANDLING_CHOLLA_H #include -void chexit(int code); + +#include "../global/global.h" +[[noreturn]] void chexit(int code); + +/*! + * \brief Check that the Cholla configuration and parameters don't have any significant errors. Mostly compile time + * checks. + * + */ +void Check_Configuration(Parameters const& P); + +/*! + * \brief helper function that prints an error message & aborts the program (in + * an MPI-safe way). Commonly invoked through a macro. + * + */ +[[noreturn]] void Abort_With_Err_(const char* func_name, const char* file_name, int line_num, const char* msg, ...); + +/* __CHOLLA_PRETTY_FUNC__ is a magic constant like __LINE__ or __FILE__ that + * provides the name of the current function. + * - The C++11 standard requires that __func__ is provided on all platforms, but + * that only provides limited information (just the name of the function). + * - Where available, we prefer to use compiler-specific features that provide + * more information about the function (like the scope of the function & the + * the function signature). + */ +#ifdef __GNUG__ + #define __CHOLLA_PRETTY_FUNC__ __PRETTY_FUNCTION__ +#else + #define __CHOLLA_PRETTY_FUNC__ __func__ +#endif + +/*! + * \brief print an error-message (with printf formatting) & abort the program. + * + * This macro should be treated as a function with the signature: + * [[noreturn]] void CHOLLA_ERROR(const char* msg, ...); + * + * - The 1st arg is printf-style format argument specifying the error message + * - The remaining args arguments are used to format error message + * + * \note + * the ``msg`` string is part of the variadic args so that there is always + * at least 1 variadic argument (even in cases when ``msg`` doesn't format + * any arguments). There is no way around this until C++ 20. + */ +#define CHOLLA_ERROR(...) Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__) + +/*! + * \brief if the condition is false, print an error-message (with printf + * formatting) & abort the program. + * + * This macro should be treated as a function with the signature: + * [[noreturn]] void CHOLLA_ASSERT(bool cond, const char* msg, ...); + * + * - The 1st arg is a boolean condition. When true, this does noth + * - The 2nd arg is printf-style format argument specifying the error message + * - The remaining args arguments are used to format error message + * + * \note + * the behavior is independent of the ``NDEBUG`` macro + */ +#define CHOLLA_ASSERT(cond, ...) \ + if (not(cond)) { \ + Abort_With_Err_(__CHOLLA_PRETTY_FUNC__, __FILE__, __LINE__, __VA_ARGS__); \ + } + #endif /*ERROR_HANDLING_CHOLLA_H*/ diff --git a/src/utils/gpu.hpp b/src/utils/gpu.hpp index f68172768..0817940cc 100644 --- a/src/utils/gpu.hpp +++ b/src/utils/gpu.hpp @@ -3,437 +3,470 @@ #include #include #include +#include +#include + +#include "../utils/error_handling.h" #ifdef O_HIP -#include + #include + + #if defined(PARIS) || defined(PARIS_GALACTIC) + + #include + + #endif // CUFFT PARIS PARIS_GALACTIC + + #define WARPSIZE 64 +static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; + + #define CUFFT_D2Z HIPFFT_D2Z + #define CUFFT_FORWARD HIPFFT_FORWARD + #define CUFFT_INVERSE HIPFFT_BACKWARD + #define CUFFT_Z2D HIPFFT_Z2D + #define CUFFT_Z2Z HIPFFT_Z2Z + #define CUFFT_SUCCESS HIPFFT_SUCCESS + #define cufftResult_t hipfftResult_t + + #define cudaDeviceSynchronize hipDeviceSynchronize + #define cudaError hipError_t + #define cudaError_t hipError_t + #define cudaErrorInsufficientDriver hipErrorInsufficientDriver + #define cudaErrorNoDevice hipErrorNoDevice + #define cudaEvent_t hipEvent_t + #define cudaEventCreate hipEventCreate + #define cudaEventElapsedTime hipEventElapsedTime + #define cudaEventRecord hipEventRecord + #define cudaEventSynchronize hipEventSynchronize + #define cudaFree hipFree + #define cudaFreeHost hipHostFree + #define cudaGetDevice hipGetDevice + #define cudaGetDeviceCount hipGetDeviceCount + #define cudaGetErrorString hipGetErrorString + #define cudaGetLastError hipGetLastError + #define cudaHostAlloc hipHostMalloc + #define cudaHostAllocDefault hipHostMallocDefault + #define cudaMalloc hipMalloc + #define cudaMemcpy hipMemcpy + #define cudaMemcpyAsync hipMemcpyAsync + #define cudaMemcpyPeer hipMemcpyPeer + #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost + #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice + #define cudaMemcpyHostToDevice hipMemcpyHostToDevice + #define cudaMemGetInfo hipMemGetInfo + #define cudaMemset hipMemset + #define cudaReadModeElementType hipReadModeElementType + #define cudaSetDevice hipSetDevice + #define cudaSuccess hipSuccess + #define cudaDeviceProp hipDeviceProp_t + #define cudaGetDeviceProperties hipGetDeviceProperties + #define cudaPointerAttributes hipPointerAttribute_t + #define cudaPointerGetAttributes hipPointerGetAttributes + #define cudaOccupancyMaxPotentialBlockSize hipOccupancyMaxPotentialBlockSize + #define cudaMemGetInfo hipMemGetInfo + #define cudaDeviceGetPCIBusId hipDeviceGetPCIBusId + #define cudaPeekAtLastError hipPeekAtLastError + + // Texture definitions + #define cudaArray hipArray + #define cudaMallocArray hipMallocArray + #define cudaFreeArray hipFreeArray + #define cudaMemcpyToArray hipMemcpyToArray + #define cudaMemcpy2DToArray hipMemcpy2DToArray + + #define cudaTextureObject_t hipTextureObject_t + #define cudaCreateTextureObject hipCreateTextureObject + #define cudaDestroyTextureObject hipDestroyTextureObject + + #define cudaChannelFormatDesc hipChannelFormatDesc + #define cudaCreateChannelDesc hipCreateChannelDesc + #define cudaChannelFormatKindFloat hipChannelFormatKindFloat + + #define cudaResourceDesc hipResourceDesc + #define cudaResourceTypeArray hipResourceTypeArray + #define cudaTextureDesc hipTextureDesc + #define cudaAddressModeClamp hipAddressModeClamp + #define cudaFilterModeLinear hipFilterModeLinear + #define cudaFilterModePoint hipFilterModePoint + // Texture Definitions + #define cudaPointerAttributes hipPointerAttribute_t + #define cudaPointerGetAttributes hipPointerGetAttributes + + // FFT definitions + #define cufftDestroy hipfftDestroy + #define cufftDoubleComplex hipfftDoubleComplex + #define cufftDoubleReal hipfftDoubleReal + #define cufftExecD2Z hipfftExecD2Z + #define cufftExecZ2D hipfftExecZ2D + #define cufftExecZ2Z hipfftExecZ2Z + #define cufftHandle hipfftHandle + #define cufftPlan3d hipfftPlan3d + #define cufftPlanMany hipfftPlanMany + + #define curandStateMRG32k3a_t hiprandStateMRG32k3a_t + #define curand_init hiprand_init + #define curand hiprand + #define curand_poisson hiprand_poisson -#if defined(PARIS) || defined(PARIS_GALACTIC) +#else // not O_HIP -#include + #include -static void __attribute__((unused)) check(const hipfftResult err, const char *const file, const int line) -{ - if (err == HIPFFT_SUCCESS) return; - fprintf(stderr,"HIPFFT ERROR AT LINE %d OF FILE '%s': %d\n",line,file,err); - fflush(stderr); - exit(err); -} + #if defined(PARIS) || defined(PARIS_GALACTIC) -#endif // PARIS PARIC_GALACTIC - -#define WARPSIZE 64 -static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; - -#define CUFFT_D2Z HIPFFT_D2Z -#define CUFFT_FORWARD HIPFFT_FORWARD -#define CUFFT_INVERSE HIPFFT_BACKWARD -#define CUFFT_Z2D HIPFFT_Z2D -#define CUFFT_Z2Z HIPFFT_Z2Z - -#define cudaDeviceSynchronize hipDeviceSynchronize -#define cudaError hipError_t -#define cudaError_t hipError_t -#define cudaErrorInsufficientDriver hipErrorInsufficientDriver -#define cudaErrorNoDevice hipErrorNoDevice -#define cudaEvent_t hipEvent_t -#define cudaEventCreate hipEventCreate -#define cudaEventElapsedTime hipEventElapsedTime -#define cudaEventRecord hipEventRecord -#define cudaEventSynchronize hipEventSynchronize -#define cudaFree hipFree -#define cudaFreeHost hipHostFree -#define cudaGetDevice hipGetDevice -#define cudaGetDeviceCount hipGetDeviceCount -#define cudaGetErrorString hipGetErrorString -#define cudaGetLastError hipGetLastError -#define cudaHostAlloc hipHostMalloc -#define cudaHostAllocDefault hipHostMallocDefault -#define cudaMalloc hipMalloc -#define cudaMemcpy hipMemcpy -#define cudaMemcpyAsync hipMemcpyAsync -#define cudaMemcpyPeer hipMemcpyPeer -#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost -#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice -#define cudaMemcpyHostToDevice hipMemcpyHostToDevice -#define cudaMemGetInfo hipMemGetInfo -#define cudaMemset hipMemset -#define cudaReadModeElementType hipReadModeElementType -#define cudaSetDevice hipSetDevice -#define cudaSuccess hipSuccess -#define cudaDeviceProp hipDeviceProp_t -#define cudaGetDeviceProperties hipGetDeviceProperties -#define cudaPointerAttributes hipPointerAttribute_t -#define cudaPointerGetAttributes hipPointerGetAttributes - -// Texture definitions -#define cudaArray hipArray -#define cudaMallocArray hipMallocArray -#define cudaFreeArray hipFreeArray -#define cudaMemcpyToArray hipMemcpyToArray -#define cudaMemcpy2DToArray hipMemcpy2DToArray - - -#define cudaTextureObject_t hipTextureObject_t -#define cudaCreateTextureObject hipCreateTextureObject -#define cudaDestroyTextureObject hipDestroyTextureObject - -#define cudaChannelFormatDesc hipChannelFormatDesc -#define cudaCreateChannelDesc hipCreateChannelDesc -#define cudaChannelFormatKindFloat hipChannelFormatKindFloat - -#define cudaResourceDesc hipResourceDesc -#define cudaResourceTypeArray hipResourceTypeArray -#define cudaTextureDesc hipTextureDesc -#define cudaAddressModeClamp hipAddressModeClamp -#define cudaFilterModeLinear hipFilterModeLinear -#define cudaFilterModePoint hipFilterModePoint -// Texture Definitions - -// FFT definitions -#define cufftDestroy hipfftDestroy -#define cufftDoubleComplex hipfftDoubleComplex -#define cufftDoubleReal hipfftDoubleReal -#define cufftExecD2Z hipfftExecD2Z -#define cufftExecZ2D hipfftExecZ2D -#define cufftExecZ2Z hipfftExecZ2Z -#define cufftHandle hipfftHandle -#define cufftPlan3d hipfftPlan3d -#define cufftPlanMany hipfftPlanMany - -static void __attribute__((unused)) check(const hipError_t err, const char *const file, const int line) -{ - if (err == hipSuccess) return; - fprintf(stderr,"HIP ERROR AT LINE %d OF FILE '%s': %s %s\n",line,file,hipGetErrorName(err),hipGetErrorString(err)); - fflush(stderr); - exit(err); -} + #include -#else // not O_HIP + #endif // defined(PARIS) || defined(PARIS_GALACTIC) -#include + #define WARPSIZE 32 +static constexpr int maxWarpsPerBlock = 1024 / WARPSIZE; + #define hipLaunchKernelGGL(F, G, B, M, S, ...) F<<>>(__VA_ARGS__) + #define __shfl_down(...) __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__) -#if defined(PARIS) || defined(PARIS_GALACTIC) +#endif // O_HIP -#include +#define GPU_MAX_THREADS 256 -static void check(const cufftResult err, const char *const file, const int line) +/*! + * \brief Check for CUDA/HIP error codes. Can be called wrapping a GPU function that returns a value or with no + * arguments and it will get the latest error code. + * + * \param[in] code The code to check. Defaults to the last error code + * \param[in] abort Whether or not to abort if an error is encountered. Defaults to True + * \param[in] location The location of the call. This should be left as the default value. + */ +inline void GPU_Error_Check(cudaError_t code = cudaPeekAtLastError(), bool abort = true, + std::experimental::source_location location = std::experimental::source_location::current()) { - if (err == CUFFT_SUCCESS) return; - fprintf(stderr,"CUFFT ERROR AT LINE %d OF FILE '%s': %d\n",line,file,err); - fflush(stderr); - exit(err); +#ifndef DISABLE_GPU_ERROR_CHECKING + code = cudaDeviceSynchronize(); + + // Check the code + if (code != cudaSuccess) { + std::cout << "GPU_Error_Check: Failed at " + << "Line: " << location.line() << ", File: " << location.file_name() + << ", Function: " << location.function_name() << ", with code: " << cudaGetErrorString(code) << std::endl; + if (abort) { + chexit(code); + } + } +#endif // DISABLE_GPU_ERROR_CHECKING } -#endif // defined(PARIS) || defined(PARIS_GALACTIC) - -static void check(const cudaError_t err, const char *const file, const int line) +#if defined(PARIS) || defined(PARIS_GALACTIC) +/*! + * \brief Check for CUFFT/HIPFFT error codes. Can be called wrapping a FFT function that returns a value + * + * \param[in] code The code to check + * \param[in] abort Whether or not to abort if an error is encountered. Defaults to True + * \param[in] location The location of the call. This should be left as the default value. + */ +inline void GPU_Error_Check(cufftResult_t code, bool abort = true, + std::experimental::source_location location = std::experimental::source_location::current()) { - if (err == cudaSuccess) return; - fprintf(stderr,"CUDA ERROR AT LINE %d OF FILE '%s': %s %s\n",line,file,cudaGetErrorName(err),cudaGetErrorString(err)); - fflush(stderr); - exit(err); + #ifndef DISABLE_GPU_ERROR_CHECKING + // Check the code + if (code != CUFFT_SUCCESS) { + std::cout << "GPU_Error_Check: Failed at " + << "Line: " << location.line() << ", File: " << location.file_name() + << ", Function: " << location.function_name() << ", with FFT code: " << code << std::endl; + if (abort) { + chexit(code); + } + } + #endif // DISABLE_GPU_ERROR_CHECKING } - -#define WARPSIZE 32 -static constexpr int maxWarpsPerBlock = 1024/WARPSIZE; -#define hipLaunchKernelGGL(F,G,B,M,S,...) F<<>>(__VA_ARGS__) -#define __shfl_down(...) __shfl_down_sync(0xFFFFFFFF, __VA_ARGS__) - -#endif //O_HIP - -#define CHECK(X) check(X,__FILE__,__LINE__) - -#define GPU_MAX_THREADS 256 +#endif // defined(PARIS) || defined(PARIS_GALACTIC) #if defined(__CUDACC__) || defined(__HIPCC__) template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun0(const int n0, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0(const int n0, const F f) { - const int i0 = blockIdx.x*blockDim.x+threadIdx.x; - if (i0 < n0) f(i0); + const int i0 = blockIdx.x * blockDim.x + threadIdx.x; + if (i0 < n0) { + f(i0); + } } template void gpuFor(const int n0, const F f) { - if (n0 <= 0) return; - const int b0 = (n0+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t0 = (n0+b0-1)/b0; - gpuRun0<<>>(n0,f); - CHECK(cudaGetLastError()); + if (n0 <= 0) { + return; + } + const int b0 = (n0 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t0 = (n0 + b0 - 1) / b0; + gpuRun0<<>>(n0, f); + GPU_Error_Check(); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun0x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0x2(const F f) { const int i0 = threadIdx.y; const int i1 = threadIdx.x; - f(i0,i1); + f(i0, i1); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun1x1(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x1(const F f) { const int i0 = blockIdx.x; const int i1 = threadIdx.x; - f(i0,i1); + f(i0, i1); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x0(const int n1, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x0(const int n1, const F f) { const int i0 = blockIdx.y; - const int i1 = blockIdx.x*blockDim.x+threadIdx.x; - if (i1 < n1) f(i0,i1); + const int i1 = blockIdx.x * blockDim.x + threadIdx.x; + if (i1 < n1) { + f(i0, i1); + } } template void gpuFor(const int n0, const int n1, const F f) { - if ((n0 <= 0) || (n1 <= 0)) return; - const long nl01 = long(n0)*long(n1); + if ((n0 <= 0) || (n1 <= 0)) { + return; + } + const long nl01 = long(n0) * long(n1); assert(nl01 < long(INT_MAX)); if (n1 > GPU_MAX_THREADS) { - const int b1 = (n1+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t1 = (n1+b1-1)/b1; - gpuRun2x0<<>>(n1,f); - CHECK(cudaGetLastError()); + const int b1 = (n1 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t1 = (n1 + b1 - 1) / b1; + gpuRun2x0<<>>(n1, f); + GPU_Error_Check(); } else if (nl01 > GPU_MAX_THREADS) { - gpuRun1x1<<>>(f); - CHECK(cudaGetLastError()); + gpuRun1x1<<>>(f); + GPU_Error_Check(); } else { - gpuRun0x2<<<1,dim3(n1,n0)>>>(f); - CHECK(cudaGetLastError()); + gpuRun0x2<<<1, dim3(n1, n0)>>>(f); + GPU_Error_Check(); } } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun0x3(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun0x3(const F f) { const int i0 = threadIdx.z; const int i1 = threadIdx.y; const int i2 = threadIdx.x; - f(i0,i1,i2); + f(i0, i1, i2); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun1x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x2(const F f) { const int i0 = blockIdx.x; const int i1 = threadIdx.y; const int i2 = threadIdx.x; - f(i0,i1,i2); + f(i0, i1, i2); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x1(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x1(const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x; const int i2 = threadIdx.x; - f(i0,i1,i2); + f(i0, i1, i2); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun3x0(const int n2, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x0(const int n2, const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; - const int i2 = blockIdx.x*blockDim.x+threadIdx.x; - if (i2 < n2) f(i0,i1,i2); + const int i2 = blockIdx.x * blockDim.x + threadIdx.x; + if (i2 < n2) { + f(i0, i1, i2); + } } template void gpuFor(const int n0, const int n1, const int n2, const F f) { - if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) return; - const long nl12 = long(n1)*long(n2); - const long nl012 = long(n0)*nl12; + if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0)) { + return; + } + const long nl12 = long(n1) * long(n2); + const long nl012 = long(n0) * nl12; assert(nl012 < long(INT_MAX)); if (n2 > GPU_MAX_THREADS) { - const int b2 = (n2+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t2 = (n2+b2-1)/b2; - gpuRun3x0<<>>(n2,f); - CHECK(cudaGetLastError()); + const int b2 = (n2 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t2 = (n2 + b2 - 1) / b2; + gpuRun3x0<<>>(n2, f); + GPU_Error_Check(); } else if (nl12 > GPU_MAX_THREADS) { - gpuRun2x1<<>>(f); - CHECK(cudaGetLastError()); + gpuRun2x1<<>>(f); + GPU_Error_Check(); } else if (nl012 > GPU_MAX_THREADS) { - gpuRun1x2<<>>(f); - CHECK(cudaGetLastError()); + gpuRun1x2<<>>(f); + GPU_Error_Check(); } else { - gpuRun0x3<<<1,dim3(n2,n1,n0)>>>(f); - CHECK(cudaGetLastError()); + gpuRun0x3<<<1, dim3(n2, n1, n0)>>>(f); + GPU_Error_Check(); } } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun1x3(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun1x3(const F f) { const int i0 = blockIdx.x; const int i1 = threadIdx.z; const int i2 = threadIdx.y; const int i3 = threadIdx.x; - f(i0,i1,i2,i3); + f(i0, i1, i2, i3); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x2(const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x; const int i2 = threadIdx.y; const int i3 = threadIdx.x; - f(i0,i1,i2,i3); + f(i0, i1, i2, i3); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun3x1(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x1(const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; const int i2 = blockIdx.x; const int i3 = threadIdx.x; - f(i0,i1,i2,i3); + f(i0, i1, i2, i3); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun4x0(const int n23, const int n3, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x0(const int n23, const int n3, const F f) { - const int i23 = blockIdx.x*blockDim.x+threadIdx.x; + const int i23 = blockIdx.x * blockDim.x + threadIdx.x; if (i23 < n23) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; - const int i2 = i23/n3; - const int i3 = i23%n3; - f(i0,i1,i2,i3); + const int i2 = i23 / n3; + const int i3 = i23 % n3; + f(i0, i1, i2, i3); } } template void gpuFor(const int n0, const int n1, const int n2, const int n3, const F f) { - if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) return; - const long nl23 = long(n2)*long(n3); - const long nl123 = long(n1)*nl23; - assert(long(n0)*nl123 < long(INT_MAX)); + if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0)) { + return; + } + const long n23_long = long(n2) * long(n3); + const long n123_long = long(n1) * n23_long; + assert(long(n0) * n123_long < long(INT_MAX)); - const int n23 = int(nl23); - const int n123 = int(nl123); + const int n23 = int(n23_long); + const int n123 = int(n123_long); if (n3 > GPU_MAX_THREADS) { - const int b23 = (n23+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t23 = (n23+b23-1)/b23; - gpuRun4x0<<>>(n23,n3,f); - CHECK(cudaGetLastError()); + const int b23 = (n23 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t23 = (n23 + b23 - 1) / b23; + gpuRun4x0<<>>(n23, n3, f); + GPU_Error_Check(); } else if (n23 > GPU_MAX_THREADS) { - gpuRun3x1<<>>(f); - CHECK(cudaGetLastError()); + gpuRun3x1<<>>(f); + GPU_Error_Check(); } else if (n123 > GPU_MAX_THREADS) { - gpuRun2x2<<>>(f); - CHECK(cudaGetLastError()); + gpuRun2x2<<>>(f); + GPU_Error_Check(); } else { - gpuRun1x3<<>>(f); - CHECK(cudaGetLastError()); + gpuRun1x3<<>>(f); + GPU_Error_Check(); } } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun2x3(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun2x3(const F f) { const int i0 = blockIdx.y; const int i1 = blockIdx.x; const int i2 = threadIdx.z; const int i3 = threadIdx.y; const int i4 = threadIdx.x; - f(i0,i1,i2,i3,i4); + f(i0, i1, i2, i3, i4); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun3x2(const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun3x2(const F f) { const int i0 = blockIdx.z; const int i1 = blockIdx.y; const int i2 = blockIdx.x; const int i3 = threadIdx.y; const int i4 = threadIdx.x; - f(i0,i1,i2,i3,i4); + f(i0, i1, i2, i3, i4); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun4x1(const int n1, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun4x1(const int n1, const F f) { const int i01 = blockIdx.z; - const int i0 = i01/n1; - const int i1 = i01%n1; - const int i2 = blockIdx.y; - const int i3 = blockIdx.x; - const int i4 = threadIdx.x; - f(i0,i1,i2,i3,i4); + const int i0 = i01 / n1; + const int i1 = i01 % n1; + const int i2 = blockIdx.y; + const int i3 = blockIdx.x; + const int i4 = threadIdx.x; + f(i0, i1, i2, i3, i4); } template -__global__ __launch_bounds__(GPU_MAX_THREADS) -void gpuRun5x0(const int n1, const int n34, const int n4, const F f) +__global__ __launch_bounds__(GPU_MAX_THREADS) void gpuRun5x0(const int n1, const int n34, const int n4, const F f) { - const int i34 = blockIdx.x*blockDim.x+threadIdx.x; + const int i34 = blockIdx.x * blockDim.x + threadIdx.x; if (i34 < n34) { const int i01 = blockIdx.z; - const int i0 = i01/n1; - const int i1 = i01%n1; - const int i2 = blockIdx.y; - const int i3 = i34/n4; - const int i4 = i34%n4; - f(i0,i1,i2,i3,i4); + const int i0 = i01 / n1; + const int i1 = i01 % n1; + const int i2 = blockIdx.y; + const int i3 = i34 / n4; + const int i4 = i34 % n4; + f(i0, i1, i2, i3, i4); } } template void gpuFor(const int n0, const int n1, const int n2, const int n3, const int n4, const F f) { - if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) return; - const long nl01 = long(n0)*long(n1); - const long nl34 = long(n3)*long(n4); - assert(nl01*long(n2)*nl34 < long(INT_MAX)); + if ((n0 <= 0) || (n1 <= 0) || (n2 <= 0) || (n3 <= 0) || (n4 <= 0)) { + return; + } + const long nl01 = long(n0) * long(n1); + const long nl34 = long(n3) * long(n4); + assert(nl01 * long(n2) * nl34 < long(INT_MAX)); const int n34 = int(nl34); if (n4 > GPU_MAX_THREADS) { const int n01 = int(nl01); - const int b34 = (n34+GPU_MAX_THREADS-1)/GPU_MAX_THREADS; - const int t34 = (n34+b34-1)/b34; - gpuRun5x0<<>>(n1,n34,n4,f); - CHECK(cudaGetLastError()); + const int b34 = (n34 + GPU_MAX_THREADS - 1) / GPU_MAX_THREADS; + const int t34 = (n34 + b34 - 1) / b34; + gpuRun5x0<<>>(n1, n34, n4, f); + GPU_Error_Check(); } else if (n34 > GPU_MAX_THREADS) { - const int n01 = n0*n1; - gpuRun4x1<<>>(n1,f); - CHECK(cudaGetLastError()); - } else if (n2*n34 > GPU_MAX_THREADS) { - gpuRun3x2<<>>(f); - CHECK(cudaGetLastError()); + const int n01 = n0 * n1; + gpuRun4x1<<>>(n1, f); + GPU_Error_Check(); + } else if (n2 * n34 > GPU_MAX_THREADS) { + gpuRun3x2<<>>(f); + GPU_Error_Check(); } else { - gpuRun2x3<<>>(f); - CHECK(cudaGetLastError()); + gpuRun2x3<<>>(f); + GPU_Error_Check(); } } -#define GPU_LAMBDA [=] __device__ + #define GPU_LAMBDA [=] __device__ #endif diff --git a/src/utils/gpu_arrays_functions.cu b/src/utils/gpu_arrays_functions.cu index 2111f0907..0a84ef64e 100644 --- a/src/utils/gpu_arrays_functions.cu +++ b/src/utils/gpu_arrays_functions.cu @@ -1,75 +1,62 @@ +#include + +#include "../global/global_cuda.h" #include "../utils/error_handling.h" #include "../utils/gpu.hpp" -#include "../global/global_cuda.h" #include "../utils/gpu_arrays_functions.h" -#include +void Extend_GPU_Array_Real(Real **current_array_d, int current_size, int new_size, bool print_out) +{ + if (new_size <= current_size) { + return; + } + if (print_out) { + std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + } -void Extend_GPU_Array_Real( Real **current_array_d, int current_size, int new_size, bool print_out ){ - - if ( new_size <= current_size ) return; - if ( print_out ) std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; - size_t global_free, global_total; - CudaSafeCall( cudaMemGetInfo( &global_free, &global_total ) ); + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); cudaDeviceSynchronize(); - #ifdef PRINT_GPU_MEMORY - printf( "ReAllocating GPU Memory: %d MB free \n", (int) global_free/1000000); - #endif - - if ( global_free < new_size*sizeof(Real) ){ - printf( "ERROR: Not enough global device memory \n" ); - printf( " Available Memory: %d MB \n", (int) (global_free/1000000) ); - printf( " Requested Memory: %d MB \n", (int) (new_size*sizeof(Real)/1000000) ); +#ifdef PRINT_GPU_MEMORY + printf("ReAllocating GPU Memory: %d MB free \n", (int)global_free / 1000000); +#endif + + if (global_free < new_size * sizeof(Real)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %d MB \n", (int)(global_free / 1000000)); + printf(" Requested Memory: %d MB \n", (int)(new_size * sizeof(Real) / 1000000)); // exit(-1); } - + Real *new_array_d; - CudaSafeCall( cudaMalloc((void**)&new_array_d, new_size*sizeof(Real)) ); + GPU_Error_Check(cudaMalloc((void **)&new_array_d, new_size * sizeof(Real))); cudaDeviceSynchronize(); - CudaCheckError(); - if ( new_array_d == NULL ){ + GPU_Error_Check(); + if (new_array_d == NULL) { std::cout << " Error When Allocating New GPU Array" << std::endl; chexit(-1); } - + // Copy the content of the original array to the new array - CudaSafeCall( cudaMemcpy( new_array_d, *current_array_d, current_size*sizeof(Real), cudaMemcpyDeviceToDevice ) ); + GPU_Error_Check(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(Real), cudaMemcpyDeviceToDevice)); cudaDeviceSynchronize(); - CudaCheckError(); - + GPU_Error_Check(); + // size_t global_free_before, global_free_after; - // CudaSafeCall( cudaMemGetInfo( &global_free_before, &global_total ) ); + // GPU_Error_Check( cudaMemGetInfo( &global_free_before, &global_total ) ); // cudaDeviceSynchronize(); - + // Free the original array cudaFree(*current_array_d); cudaDeviceSynchronize(); - CudaCheckError(); - - // CudaSafeCall( cudaMemGetInfo( &global_free_after, &global_total ) ); + GPU_Error_Check(); + + // GPU_Error_Check( cudaMemGetInfo( &global_free_after, &global_total ) ); // cudaDeviceSynchronize(); - // - // printf("Freed Memory: %d MB\n", (int) (global_free_after - global_free_before)/1000000 ); - + // + // printf("Freed Memory: %d MB\n", (int) (global_free_after - + // global_free_before)/1000000 ); + // Replace the pointer of the original array with the new one *current_array_d = new_array_d; - } - - - - - - - - - - - - - - - - - diff --git a/src/utils/gpu_arrays_functions.h b/src/utils/gpu_arrays_functions.h index aae5fa2dc..f15b379ab 100644 --- a/src/utils/gpu_arrays_functions.h +++ b/src/utils/gpu_arrays_functions.h @@ -1,10 +1,58 @@ #ifndef GPU_ARRAY_FUNCTIONS_H #define GPU_ARRAY_FUNCTIONS_H -#include "../global/global.h" +#include -void Extend_GPU_Array_Real( Real **current_array_d, int current_size, int new_size, bool print_out ); +#include "../global/global_cuda.h" +#include "../utils/error_handling.h" +#include "../utils/gpu.hpp" +#include "../utils/gpu_arrays_functions.h" +template +void Extend_GPU_Array(T **current_array_d, int current_size, int new_size, bool print_out) +{ + if (new_size <= current_size) { + return; + } + if (print_out) { + std::cout << " Extending GPU Array, size: " << current_size << " new_size: " << new_size << std::endl; + } + size_t global_free, global_total; + GPU_Error_Check(cudaMemGetInfo(&global_free, &global_total)); + cudaDeviceSynchronize(); +#ifdef PRINT_GPU_MEMORY + printf("ReAllocating GPU Memory: %ld MB free \n", global_free / 1000000); +#endif -#endif \ No newline at end of file + if (global_free < new_size * sizeof(T)) { + printf("ERROR: Not enough global device memory \n"); + printf(" Available Memory: %ld MB \n", global_free / 1000000); + printf(" Requested Memory: %ld MB \n", new_size * sizeof(T) / 1000000); + exit(-1); + } + + T *new_array_d; + GPU_Error_Check(cudaMalloc((void **)&new_array_d, new_size * sizeof(T))); + cudaDeviceSynchronize(); + GPU_Error_Check(); + if (new_array_d == NULL) { + std::cout << " Error When Allocating New GPU Array" << std::endl; + chexit(-1); + } + + // Copy the content of the original array to the new array + GPU_Error_Check(cudaMemcpy(new_array_d, *current_array_d, current_size * sizeof(T), cudaMemcpyDeviceToDevice)); + cudaDeviceSynchronize(); + GPU_Error_Check(); + + // Free the original array + cudaFree(*current_array_d); + cudaDeviceSynchronize(); + GPU_Error_Check(); + + // Replace the pointer of the original array with the new one + *current_array_d = new_array_d; +} + +#endif diff --git a/src/utils/hydro_utilities.cpp b/src/utils/hydro_utilities.cpp index 7fa7c1894..bc649c75c 100644 --- a/src/utils/hydro_utilities.cpp +++ b/src/utils/hydro_utilities.cpp @@ -1,5 +1,6 @@ #include "../utils/hydro_utilities.h" -namespace hydro_utilities { +namespace hydro_utilities +{ -} // end namespace hydro_utilities \ No newline at end of file +} // end namespace hydro_utilities \ No newline at end of file diff --git a/src/utils/hydro_utilities.h b/src/utils/hydro_utilities.h index 51439ac29..1a464e899 100644 --- a/src/utils/hydro_utilities.h +++ b/src/utils/hydro_utilities.h @@ -14,60 +14,201 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../utils/gpu.hpp" +#include "../utils/math_utilities.h" +#include "../utils/mhd_utilities.h" +/*! + * INDEX OF VARIABLES + * P : pressure + * vx, vy, vz : x, y, and z velocity + * d : density + * E : energy + * T : temperature + * mx, my, mz : x, y, and z momentum + * n : number density + */ + +namespace hydro_utilities +{ + +inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, + Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) +{ + Real pressure = E - 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz); + +#ifdef MHD + pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return fmax((gamma - 1.) * pressure, TINY_NUMBER); +} + +inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, + Real const &mz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) +{ + Real pressure = E - 0.5 * math_utils::SquareMagnitude(mx, my, mz) / d; + +#ifdef MHD + pressure -= mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return fmax((gamma - 1.) * pressure, TINY_NUMBER); +} + +inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) +{ + Real T = P * PRESSURE_UNIT / (n * KB); + return T; +} + +/*! + * \brief Compute the temperature from the conserved variables + * + * \param[in] E The energy + * \param[in] d The density + * \param[in] mx The momentum in the X-direction + * \param[in] my The momentum in the Y-direction + * \param[in] mz The momentum in the Z-direction + * \param[in] gamma The adiabatic index + * \param[in] n The number density + * \param[in] magnetic_x The cell centered magnetic field in the X-direction + * \param[in] magnetic_y The cell centered magnetic field in the Y-direction + * \param[in] magnetic_z The cell centered magnetic field in the Z-direction + * \return Real The temperature of the gas in a cell + */ +inline __host__ __device__ Real Calc_Temp_Conserved(Real const E, Real const d, Real const mx, Real const my, + Real const mz, Real const gamma, Real const n, + Real const magnetic_x = 0.0, Real const magnetic_y = 0.0, + Real const magnetic_z = 0.0) +{ + Real const P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma, magnetic_x, magnetic_y, magnetic_z); + return Calc_Temp(P, n); +} + +#ifdef DE +/*! + * \brief Compute the temperature when DE is turned on + * + * \param[in] gas_energy The total gas energy in the cell. This is the value stored in the grid at + * grid_enum::GasEnergy + * \param[in] gamma The adiabatic index + * \param[in] n The number density + * \return Real The temperature + */ +inline __host__ __device__ Real Calc_Temp_DE(Real const gas_energy, Real const gamma, Real const n) +{ + return gas_energy * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); +} +#endif // DE + +inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, + Real const &vz, Real const &gamma, Real const &magnetic_x = 0.0, + Real const &magnetic_y = 0.0, Real const &magnetic_z = 0.0) +{ + // Compute and return energy + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz); + +#ifdef MHD + energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return energy; +} + +inline __host__ __device__ Real Calc_Energy_Conserved(Real const &P, Real const &d, Real const &momentum_x, + Real const &momentum_y, Real const &momentum_z, Real const &gamma, + Real const &magnetic_x = 0.0, Real const &magnetic_y = 0.0, + Real const &magnetic_z = 0.0) +{ + // Compute and return energy + Real energy = (fmax(P, TINY_NUMBER) / (gamma - 1.)) + + (0.5 / d) * math_utils::SquareMagnitude(momentum_x, momentum_y, momentum_z); + +#ifdef MHD + energy += mhd::utils::computeMagneticEnergy(magnetic_x, magnetic_y, magnetic_z); +#endif // MHD + + return energy; +} + +inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, + Real const &gamma) +{ + Real U, P; + Real eta = DE_ETA_1; + // Apply same condition as Byan+2013 to select the internal energy from which + // compute pressure. + if (U_total / E > eta) { + U = U_total; + } else { + U = U_advected; + } + P = U * (gamma - 1.0); + return fmax(P, (Real)TINY_NUMBER); + ; +} + +/*! + * \brief Compute the kinetic energy from the density and velocities + * + * \param[in] d The density + * \param[in] vx The x velocity + * \param[in] vy The y velocity + * \param[in] vz The z velocity + * \return Real The kinetic energy + */ +inline __host__ __device__ Real Calc_Kinetic_Energy_From_Velocity(Real const &d, Real const &vx, Real const &vy, + Real const &vz) +{ + return 0.5 * d * math_utils::SquareMagnitude(vx, vy, vz); +} + +/*! + * \brief Compute the kinetic energy from the density and momenta + * + * \param[in] d The density + * \param[in] mx The x momentum + * \param[in] my The y momentum + * \param[in] mz The z momentum + * \return Real The kinetic energy + */ +inline __host__ __device__ Real Calc_Kinetic_Energy_From_Momentum(Real const &d, Real const &mx, Real const &my, + Real const &mz) +{ + return (0.5 / d) * math_utils::SquareMagnitude(mx, my, mz); +} + +/*! + * \brief Compute the sound speed in the cell from conserved variables + * + * \param E Energy + * \param d densidy + * \param mx x momentum + * \param my y momentum + * \param mz z momentum + * \param gamma adiabatic index + * \return Real The sound speed + */ +inline __host__ __device__ Real Calc_Sound_Speed(Real const &E, Real const &d, Real const &mx, Real const &my, + Real const &mz, Real const &gamma) +{ + Real P = Calc_Pressure_Conserved(E, d, mx, my, mz, gamma); + return sqrt(gamma * P / d); +} /*! -* INDEX OF VARIABLES -* P : pressure -* vx, vy, vz : x, y, and z velocity -* d : density -* E : energy -* T : temperature -* mx, my, mz : x, y, and z momentum -* n : number density -*/ - -namespace hydro_utilities { - inline __host__ __device__ Real Calc_Pressure_Primitive(Real const &E, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { - Real P; - P = (E - 0.5 * d * (vx*vx + vy*vy + vz*vz)) * (gamma - 1.0); - P = fmax(P, TINY_NUMBER); - return P; - } - - inline __host__ __device__ Real Calc_Pressure_Conserved(Real const &E, Real const &d, Real const &mx, Real const &my, Real const &mz, Real const &gamma) { - Real P= (E - 0.5 * (mx*mx + my*my + mz*mz) / d) * (gamma - 1.); - return fmax(P, TINY_NUMBER); - } - - inline __host__ __device__ Real Calc_Temp(Real const &P, Real const &n) { - Real T = P * PRESSURE_UNIT / (n * KB); - return T; - } - - #ifdef DE - inline __host__ __device__ Real Calc_Temp_DE(Real const &d, Real const &ge, Real const &gamma, Real const&n) { - Real T = d * ge * (gamma - 1.0) * PRESSURE_UNIT / (n * KB); - return T; - } - #endif // DE - - inline __host__ __device__ Real Calc_Energy_Primitive(Real const &P, Real const &d, Real const &vx, Real const &vy, Real const &vz, Real const &gamma) { - // Compute and return energy - return (fmax(P, TINY_NUMBER)/(gamma - 1.)) + 0.5 * d * (vx*vx + vy*vy + vz*vz); - } - - inline __host__ __device__ Real Get_Pressure_From_DE(Real const &E, Real const &U_total, Real const &U_advected, Real const &gamma) { - Real U, P; - Real eta = DE_ETA_1; - // Apply same condition as Byan+2013 to select the internal energy from which compute pressure. - if (U_total/E > eta) { - U = U_total; - } else { - U = U_advected; - } - P = U * (gamma - 1.0); - return P; - } - -} \ No newline at end of file + * \brief Compute the sound in the cell from primitive variables + * + * \param P + * \param d + * \param gamma + * \return __host__ + */ +inline __host__ __device__ Real Calc_Sound_Speed(Real const &P, Real const &d, Real const &gamma) +{ + return sqrt(gamma * P / d); +} + +} // namespace hydro_utilities diff --git a/src/utils/hydro_utilities_tests.cpp b/src/utils/hydro_utilities_tests.cpp index e8a066d12..fe0cbe9e6 100644 --- a/src/utils/hydro_utilities_tests.cpp +++ b/src/utils/hydro_utilities_tests.cpp @@ -1,129 +1,264 @@ /*! * \file hyo_utilities_tests.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie (helenarichie@pitt.edu) - * \brief Tests for the contents of hydro_utilities.h and hydro_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu), Helena Richie + * (helenarichie@pitt.edu) \brief Tests for the contents of hydro_utilities.h + * and hydro_utilities.cpp * */ // STL Includes -#include -#include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/hydro_utilities.h" #include "../global/global.h" +#include "../utils/hydro_utilities.h" +#include "../utils/testing_utilities.h" /*! -* INDEX OF VARIABLES -* P : pressure -* vx, vy, vz : x, y, and z velocity -* d : density -* E : energy -* T : temperature -* mx, my, mz : x, y, and z momentum -* n : number density -*/ + * INDEX OF VARIABLES + * P : pressure + * vx, vy, vz : x, y, and z velocity + * d : density + * E : energy + * T : temperature + * mx, my, mz : x, y, and z momentum + * n : number density + */ // ============================================================================= // Local helper functions namespace { - struct TestParams - { - double gamma = 5./3.; - std::vector d {1.0087201154e-15, 1.0756968986e2, 1.0882403847e100}; - std::vector vx {1.0378624601e-100, 1.0829278656e2, 1.0800514112e100}; - std::vector vy {1.0583469014e-100, 1.0283073464e2, 1.0725717864e100}; - std::vector vz {1.0182972216e-100, 1.0417748226e2, 1.0855352639e100}; - std::vector mx {0.2340416681e-100, 0.1019429453e2, 0.5062596954e100}; - std::vector my {0.9924582299e-100, 0.1254780684e2, 0.5939640992e100}; - std::vector mz {0.6703192739e-100, 0.5676716066e2, 0.2115881803e100}; - std::vector E {20.9342082433e-90, 20.9976906577e10, 20.9487120853e300}; - std::vector P {2.2244082909e-10, 8.6772951021e2, 6.7261085663e100}; - std::vector n {3.0087201154e-10, 1.3847303413e2, 1.0882403847e100}; - std::vector ge {4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; - std::vector U_total {2.389074039e-10, 4.890374019e2, 6.8731436293e100}; - std::vector U_advected {1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; - std::vector names{"Small number case", "Medium number case", "Large number case"}; - }; +struct TestParams { + double gamma = 5. / 3.; + std::vector d{1.0087201154e-15, 1.0756968986e2, 1.0882403847e100}; + std::vector vx{1.0378624601e-100, 1.0829278656e2, 1.0800514112e100}; + std::vector vy{1.0583469014e-100, 1.0283073464e2, 1.0725717864e100}; + std::vector vz{1.0182972216e-100, 1.0417748226e2, 1.0855352639e100}; + std::vector mx{0.2340416681e-100, 0.1019429453e2, 0.5062596954e100}; + std::vector my{0.9924582299e-100, 0.1254780684e2, 0.5939640992e100}; + std::vector mz{0.6703192739e-100, 0.5676716066e2, 0.2115881803e100}; + std::vector E{20.9342082433e-90, 20.9976906577e10, 20.9487120853e300}; + std::vector P{2.2244082909e-10, 8.6772951021e2, 6.7261085663e100}; + std::vector n{3.0087201154e-10, 1.3847303413e2, 1.0882403847e100}; + std::vector ge{4.890374019e-10, 1.0756968986e2, 3.8740982372e100}; + std::vector U_total{2.389074039e-10, 4.890374019e2, 6.8731436293e100}; + std::vector U_advected{1.3847303413e-10, 1.0756968986e2, 1.0882403847e100}; + std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; + std::vector magnetic_x{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; + std::vector magnetic_y{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; + std::vector magnetic_z{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; + std::vector names{"Small number case", "Medium number case", "Large number case"}; +}; +} // namespace + +TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_pressure{0, 139982878676.5015, 1.2697896247496674e+301}; +#else // not MHD + std::vector fiducial_pressure{1e-20, 139983415580.5549, 1.2697896247496674e+301}; +#endif // MHD + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ps = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testing_utilities::Check_Results(fiducial_pressure.at(i), test_Ps, parameters.names.at(i)); + } } -TEST(tHYDROHydroUtilsCalcPressurePrimitive, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ps {1e-20, 139983415580.5549, 1.2697896247496674e+301}; +TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_pressure{0, 139984067469.81754, 1.3965808056866668e+301}; +#else // not MHD + std::vector fiducial_pressure{1e-20, 139984604373.87094, 1.3965808056866668e+301}; +#endif // MHD - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ps = hydro_utilities::Calc_Pressure_Primitive(parameters.E.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Conserved( + parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); - } + testing_utilities::Check_Results(fiducial_pressure.at(i), test_pressure, parameters.names.at(i)); + } } -TEST(tHYDROHydroUtilsCalcPressureConserved, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ps {1e-20, 139984604373.87094, 1.3965808056866668e+301}; +TEST(tHYDROtMHDHydroUtilsCalcPressurePrimitive, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ps = hydro_utilities::Calc_Pressure_Conserved(parameters.E.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), parameters.gamma); + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Primitive( + parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); - } + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl; + } } -TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ts {3465185.0560059389, 29370603.906644326, 28968949.83344138}; +TEST(tHYDROtMHDHydroUtilsCalcPressureConserved, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); + for (size_t i = 0; i < parameters.names.size() - 1; i++) { + Real test_pressure = hydro_utilities::Calc_Pressure_Conserved( + 1E-10 * parameters.E.at(i), parameters.d.at(i), 1E4 * parameters.mx.at(i), 1E4 * parameters.my.at(i), + 1E4 * parameters.mz.at(i), parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), + parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); - } + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, test_pressure) << "Difference in " << parameters.names.at(i) << std::endl; + } +} + +TEST(tHYDROHydroUtilsCalcTemp, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ts{3465185.0560059389, 29370603.906644326, 28968949.83344138}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ts = hydro_utilities::Calc_Temp(parameters.P.at(i), parameters.n.at(i)); + + testing_utilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + } } #ifdef DE -TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ts {5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107}; +TEST(tHYDROHydroUtilsCalcTempDE, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ts{5.123106988008801e-09, 261106139.02514684, 1.2105231166585662e+107}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ts = + hydro_utilities::Calc_Temp_DE(parameters.d.at(i) * parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ts = hydro_utilities::Calc_Temp_DE(parameters.d.at(i), parameters.ge.at(i), parameters.gamma, parameters.n.at(i)); + testing_utilities::Check_Results(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); + } +} +#endif // DE + +TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{3.3366124363499997e-10, 2589863.8420712831, 1.9018677140549926e+300}; +#else // not MHD + std::vector fiducial_energy{3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +#endif // MHD - testingUtilities::checkResults(fiducial_Ts.at(i), test_Ts, parameters.names.at(i)); - } + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } } -#endif // DE -TEST(tHYDROHydroUtilsCalcEnergyPrimitive, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Es {3.3366124363499997e-10, 1784507.7619407175, 1.9018677140549926e+300}; +TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{3.3366124363499997e-10, 806673.86799851817, 6.7079331637514162e+201}; +#else // not MHD + std::vector fiducial_energy{3.3366124363499997e-10, 1317.7878679524658, 1.0389584427972784e+101}; +#endif // MHD - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Es = hydro_utilities::Calc_Energy_Primitive(parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), parameters.gamma); + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Conserved( + parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Es.at(i), test_Es, parameters.names.at(i)); - } + testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } } -TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) { - TestParams parameters; - std::vector fiducial_Ps {1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99}; +TEST(tHYDROtMHDHydroUtilsCalcEnergyPrimitive, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{1.4999999999999998e-20, 2588562.2478059679, 1.9018677140549926e+300}; +#else // not MHD + std::vector fiducial_energy{0, 1783206.1676754025, 1.9018677140549926e+300}; +#endif // MHD + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Primitive( + -parameters.P.at(i), parameters.d.at(i), parameters.vx.at(i), parameters.vy.at(i), parameters.vz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); + + testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } +} - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), parameters.U_advected.at(i), parameters.gamma); +TEST(tHYDROtMHDHydroUtilsCalcEnergyConserved, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; +#ifdef MHD + std::vector fiducial_energy{0, 805372.27373320318, 6.7079331637514162e+201}; +#else // not MHD + std::vector fiducial_energy{0, 16.193602637465997, 3.0042157852278494e+99}; +#endif // MHD + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Es = hydro_utilities::Calc_Energy_Conserved( + -parameters.P.at(i), parameters.d.at(i), parameters.mx.at(i), parameters.my.at(i), parameters.mz.at(i), + parameters.gamma, parameters.magnetic_x.at(i), parameters.magnetic_y.at(i), parameters.magnetic_z.at(i)); - testingUtilities::checkResults(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); - } + testing_utilities::Check_Results(fiducial_energy.at(i), test_Es, parameters.names.at(i)); + } } + +TEST(tHYDROHydroUtilsGetPressureFromDE, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducial_Ps{1.5927160260000002e-10, 71.713126573333341, 7.2549358980000001e+99}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real test_Ps = hydro_utilities::Get_Pressure_From_DE(parameters.E.at(i), parameters.U_total.at(i), + parameters.U_advected.at(i), parameters.gamma); + + testing_utilities::Check_Results(fiducial_Ps.at(i), test_Ps, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDCalcKineticEnergyFromVelocity, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialEnergies{0.0, 6.307524975350106e-145, 1.9018677140549924e+150}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Velocity( + coef * parameters.d.at(i), coef * parameters.vx.at(i), coef * parameters.vy.at(i), coef * parameters.vz.at(i)); + + testing_utilities::Check_Results(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); + } +} + +TEST(tHYDROtMHDCalcKineticEnergyFromMomentum, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialEnergies{0.0, 0.0, 3.0042157852278499e+49}; + double const coef = 1E-50; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testEnergy = hydro_utilities::Calc_Kinetic_Energy_From_Momentum( + coef * parameters.d.at(i), coef * parameters.mx.at(i), coef * parameters.my.at(i), coef * parameters.mz.at(i)); + + testing_utilities::Check_Results(fiducialEnergies.at(i), testEnergy, parameters.names.at(i)); + } +} \ No newline at end of file diff --git a/src/utils/math_utilities.h b/src/utils/math_utilities.h new file mode 100644 index 000000000..68d13f19d --- /dev/null +++ b/src/utils/math_utilities.h @@ -0,0 +1,101 @@ +/*! + * \file math_utilities.h + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains various functions for common mathematical operations + * + */ + +#pragma once + +// STL Includes +#include +#include + +// External Includes + +// Local Includes +#include "../global/global.h" +#include "../global/global_cuda.h" +#include "../utils/gpu.hpp" + +namespace math_utils +{ +// ========================================================================= +/*! + * \brief Rotate cartesian coordinates. All arguments are cast to double + * then rotated. If the type is 'int' then the value is rounded to the + * nearest int + * + * \details Rotation such that when pitch=90 and yaw=0 x1_rot = -x3 and when + * pitch=0 and yaw=90 x1_rot = -x2 + * + * \tparam T The return type + * \param[in] x_1 x1 coordinate + * \param[in] x_2 x2 coordinate + * \param[in] x_3 x3 coordinate + * \param[in] pitch Pitch angle in radians + * \param[in] yaw Yaw angle in radians + * \return std::tuple The new, rotated, coordinates in the + * order . Intended to be captured with structured binding + */ +template +inline std::tuple rotateCoords(Real const &x_1, Real const &x_2, Real const &x_3, Real const &pitch, + Real const &yaw) +{ + // Compute the sines and cosines. Correct for floating point errors if + // the angle is 0.5*M_PI + Real const sin_yaw = std::sin(yaw); + Real const cos_yaw = (yaw == 0.5 * M_PI) ? 0 : std::cos(yaw); + Real const sin_pitch = std::sin(pitch); + Real const cos_pitch = (pitch == 0.5 * M_PI) ? 0 : std::cos(pitch); + + // Perform the rotation + Real const x_1_rot = (x_1 * cos_pitch * cos_yaw) + (x_2 * sin_yaw) + (x_3 * sin_pitch * cos_yaw); + Real const x_2_rot = (x_1 * cos_pitch * sin_yaw) + (x_2 * cos_yaw) + (x_3 * sin_pitch * sin_yaw); + Real const x_3_rot = (x_1 * sin_pitch) + (x_3 * cos_pitch); + + if (std::is_same::value) { + return {round(x_1_rot), round(x_2_rot), round(x_3_rot)}; + } else if (std::is_same::value) { + return {x_1_rot, x_2_rot, x_3_rot}; + } +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Compute the dot product of a and b. + * + * \param[in] a1 The first element of a + * \param[in] a2 The second element of a + * \param[in] a3 The third element of a + * \param[in] b1 The first element of b + * \param[in] b2 The second element of b + * \param[in] b3 The third element of b + * + * \return Real The dot product of a and b + */ +inline __device__ __host__ Real dotProduct(Real const &a1, Real const &a2, Real const &a3, Real const &b1, + Real const &b2, Real const &b3) +{ + return a1 * b1 + ((a2 * b2) + (a3 * b3)); +}; +// ========================================================================= + +// ========================================================================= +/*! + * \brief Compute the magnitude of a vector + * + * \param[in] v1 The first element of the vector + * \param[in] v2 The second element of the vector + * \param[in] v3 The third element of the vector + * + * \return Real The dot product of a and b + */ +inline __device__ __host__ Real SquareMagnitude(Real const &v1, Real const &v2, Real const &v3) +{ + return dotProduct(v1, v2, v3, v1, v2, v3); +}; +// ========================================================================= + +} // namespace math_utils diff --git a/src/utils/math_utilities_tests.cpp b/src/utils/math_utilities_tests.cpp new file mode 100644 index 000000000..a49cd8a41 --- /dev/null +++ b/src/utils/math_utilities_tests.cpp @@ -0,0 +1,77 @@ +/*! + * \file math_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the contents of math_utilities.h + * + */ + +// STL Includes +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../utils/math_utilities.h" +#include "../utils/testing_utilities.h" + +// ============================================================================= +TEST(tALLRotateCoords, CorrectInputExpectCorrectOutput) +{ + // Fiducial values + double const x_1 = 19.2497333410; + double const x_2 = 60.5197699003; + double const x_3 = 86.0613942621; + double const pitch = 1.239 * M_PI; + double const yaw = 0.171 * M_PI; + double const x_1_rot_fid = -31.565679455456568; + double const x_2_rot_fid = 14.745363873361605; + double const x_3_rot_fid = -76.05402749550727; + + auto [x_1_rot, x_2_rot, x_3_rot] = math_utils::rotateCoords(x_1, x_2, x_3, pitch, yaw); + + testing_utilities::Check_Results<0>(x_1_rot_fid, x_1_rot, "x_1 rotated values"); + testing_utilities::Check_Results<0>(x_2_rot_fid, x_2_rot, "x_2 rotated values"); + testing_utilities::Check_Results<0>(x_3_rot_fid, x_3_rot, "x_3 rotated values"); +} +// ============================================================================= + +// ========================================================================= +/*! + * \brief Test the math_utils::dotProduct function + * + */ +TEST(tALLDotProduct, CorrectInputExpectCorrectOutput) +{ + std::vector a{21.503067766457753, 48.316634031589935, 81.12177317622657}, + b{38.504606872151484, 18.984145880030045, 89.52561861038686}; + + double const fiducialDotProduct = 9007.6941261535867; + + double testDotProduct; + + testDotProduct = math_utils::dotProduct(a.at(0), a.at(1), a.at(2), b.at(0), b.at(1), b.at(2)); + + // Now check results + testing_utilities::Check_Results(fiducialDotProduct, testDotProduct, "dot product"); +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Test the math_utils::dotProduct function + * + */ +TEST(tALLSquareMagnitude, CorrectInputExpectCorrectOutput) +{ + std::vector a = {11.503067766457753, 98.316634031589935, 41.12177317622657}; + + double const fiducial_square_magnitude = 11489.481324498336; + + double test_square_magnitude = math_utils::SquareMagnitude(a.at(0), a.at(1), a.at(2)); + + // Now check results + testing_utilities::Check_Results(fiducial_square_magnitude, test_square_magnitude, "dot product"); +} +// ========================================================================= \ No newline at end of file diff --git a/src/utils/mhd_utilities.cpp b/src/utils/mhd_utilities.cpp deleted file mode 100644 index c7747830e..000000000 --- a/src/utils/mhd_utilities.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/*! - * \file mhd_utilities.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Contains the implementation of various utility functions for MHD - * - */ - -// STL Includes - -// External Includes - -// Local Includes -#include "../utils/mhd_utilities.h" - -namespace mhdUtils -{ - -} // end namespace mhdUtils \ No newline at end of file diff --git a/src/utils/mhd_utilities.cu b/src/utils/mhd_utilities.cu new file mode 100644 index 000000000..bceb4abc1 --- /dev/null +++ b/src/utils/mhd_utilities.cu @@ -0,0 +1,46 @@ +/*! + * \file mhd_utilities.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Contains the implementation of various utility functions for MHD and + * for the various kernels, functions, and tools required for the 3D VL+CT MHD + * integrator. Due to the CUDA/HIP compiler requiring that device functions be + * directly accessible to the file they're used in most device functions will be + * implemented in the header file + * + */ + +// STL Includes + +// External Includes + +// Local Includes +#include "../utils/mhd_utilities.h" + +namespace mhd::utils +{ +#ifdef MHD +void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserved const &C, + std::vector const &vectorPotential) +{ + // Compute the magnetic field + for (size_t k = 1; k < H.nz; k++) { + for (size_t j = 1; j < H.ny; j++) { + for (size_t i = 1; i < H.nx; i++) { + // Get cell index. The "xmo" means: X direction Minus One + size_t const id = cuda_utilities::compute1DIndex(i, j, k, H.nx, H.ny); + size_t const idxmo = cuda_utilities::compute1DIndex(i - 1, j, k, H.nx, H.ny); + size_t const idymo = cuda_utilities::compute1DIndex(i, j - 1, k, H.nx, H.ny); + size_t const idzmo = cuda_utilities::compute1DIndex(i, j, k - 1, H.nx, H.ny); + + C.magnetic_x[id] = (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idymo + 2 * H.n_cells)) / H.dy - + (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idzmo + 1 * H.n_cells)) / H.dz; + C.magnetic_y[id] = (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idzmo + 0 * H.n_cells)) / H.dz - + (vectorPotential.at(id + 2 * H.n_cells) - vectorPotential.at(idxmo + 2 * H.n_cells)) / H.dx; + C.magnetic_z[id] = (vectorPotential.at(id + 1 * H.n_cells) - vectorPotential.at(idxmo + 1 * H.n_cells)) / H.dx - + (vectorPotential.at(id + 0 * H.n_cells) - vectorPotential.at(idymo + 0 * H.n_cells)) / H.dy; + } + } + } +} +#endif // MHD +} // end namespace mhd::utils \ No newline at end of file diff --git a/src/utils/mhd_utilities.h b/src/utils/mhd_utilities.h index f28cbb400..f409fd4b0 100644 --- a/src/utils/mhd_utilities.h +++ b/src/utils/mhd_utilities.h @@ -8,287 +8,244 @@ #pragma once // STL Includes +#include // External Includes // Local Includes #include "../global/global.h" #include "../global/global_cuda.h" +#include "../grid/grid3D.h" +#include "../utils/cuda_utilities.h" #include "../utils/gpu.hpp" +#include "../utils/math_utilities.h" +namespace mhd::utils +{ /*! - * \brief Namespace for MHD utilities + * \brief Namespace for functions required by functions within the mhd::utils + * namespace. Everything in this name space should be regarded as private + * but is made accesible for testing * */ -namespace mhdUtils +namespace internal { - namespace // Anonymouse namespace - { - // ===================================================================== - /*! - * \brief Compute the fast or slow magnetosonic wave speeds - * - * \param density The density - * \param gasPressure The gas pressure - * \param magneticX Magnetic field in the x-direction - * \param magneticY Magnetic field in the y-direction - * \param magneticZ Magnetic field in the z-direction - * \param gamma The adiabatic index - * \param waveChoice Which speed to compute. If +1 then compute the - * speed of the fast magnetosonic wave, if -1 then the speed of the slow - * magnetosonic wave - * \return Real The speed of the fast or slow magnetosonic wave - */ - inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, - Real const &gasPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma, - Real const &waveChoice) - { - // Compute the sound speed - Real bXSquared = magneticX * magneticX; - Real bSquared = bXSquared + ((magneticY*magneticY) + (magneticZ*magneticZ)); - - Real term1 = gamma * gasPressure + bSquared; - - Real term2 = (term1*term1) - 4. * gamma * gasPressure * bXSquared; - term2 = sqrt(term2); - - return sqrt( (term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER)) ); - } - // ===================================================================== - }// Anonymouse namespace +// ===================================================================== +/*! + * \brief Compute the fast or slow magnetosonic wave speeds + * + * \param density The density + * \param gasPressure The gas pressure + * \param magneticX Magnetic field in the x-direction + * \param magneticY Magnetic field in the y-direction + * \param magneticZ Magnetic field in the z-direction + * \param gamma The adiabatic index + * \param waveChoice Which speed to compute. If +1 then compute the + * speed of the fast magnetosonic wave, if -1 then the speed of the slow + * magnetosonic wave + * \return Real The speed of the fast or slow magnetosonic wave + */ +inline __host__ __device__ Real _magnetosonicSpeed(Real const &density, Real const &gasPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma, + Real const &waveChoice) +{ + // Compute the sound speed + Real bXSquared = magneticX * magneticX; + Real bSquared = bXSquared + ((magneticY * magneticY) + (magneticZ * magneticZ)); - // ========================================================================= - /*! - * \brief Compute the MHD energy in the cell - * - * \param[in] pressure The gas pressure - * \param[in] density The density - * \param[in] velocityX Velocity in the x-direction - * \param[in] velocityY Velocity in the y-direction - * \param[in] velocityZ Velocity in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The energy within a cell - */ - inline __host__ __device__ Real computeEnergy(Real const &pressure, - Real const &density, - Real const &velocityX, - Real const &velocityY, - Real const &velocityZ, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - // Compute and return energy - return (fmax(pressure,TINY_NUMBER)/(gamma - 1.)) - + 0.5 * density * (velocityX*velocityX + ((velocityY*velocityY) + (velocityZ*velocityZ))) - + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); - } - // ========================================================================= + Real term1 = gamma * gasPressure + bSquared; - // ========================================================================= - /*! - * \brief Compute the MHD gas pressure in a cell - * - * \param[in] energy The energy - * \param[in] density The density - * \param[in] momentumX Momentum in the x-direction - * \param[in] momentumY Momentum in the y-direction - * \param[in] momentumZ Momentum in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The gas pressure in a cell - */ - inline __host__ __device__ Real computeGasPressure(Real const &energy, - Real const &density, - Real const &momentumX, - Real const &momentumY, - Real const &momentumZ, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - Real pressure = (gamma - 1.) - * (energy - - 0.5 * (momentumX*momentumX + ((momentumY*momentumY) + (momentumZ*momentumZ))) / density - - 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ)))); + Real term2 = (term1 * term1) - 4. * gamma * gasPressure * bXSquared; + term2 = sqrt(term2); - return fmax(pressure, TINY_NUMBER); - } - // ========================================================================= + return sqrt((term1 + waveChoice * term2) / (2.0 * fmax(density, TINY_NUMBER))); +} +// ===================================================================== +} // namespace internal - // ========================================================================= - /*! - * \brief Compute the MHD thermal energy in a cell - * - * \param[in] energyTot The total energy - * \param[in] density The density - * \param[in] momentumX Momentum in the x-direction - * \param[in] momentumY Momentum in the y-direction - * \param[in] momentumZ Momentum in the z-direction - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \param[in] gamma The adiabatic index - * \return Real The thermal energy in a cell - */ - inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, - Real const &density, - Real const &momentumX, - Real const &momentumY, - Real const &momentumZ, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - return energyTot - 0.5 * (momentumX*momentumX + ((momentumY*momentumY) + (momentumZ*momentumZ))) / fmax(density,TINY_NUMBER) - - 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the magnetic energy + * + * \param[in] magneticX The magnetic field in the X-direction + * \param[in] magneticY The magnetic field in the Y-direction + * \param[in] magneticZ The magnetic field in the Z-direction + * \return Real The magnetic energy + */ +inline __host__ __device__ Real computeMagneticEnergy(Real const &magneticX, Real const &magneticY, + Real const &magneticZ) +{ + return 0.5 * math_utils::SquareMagnitude(magneticX, magneticY, magneticZ); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas - * pressure - * - * \param[in] gasPressure The gas pressure - * \param[in] magneticX Magnetic field in the x-direction - * \param[in] magneticY Magnetic field in the y-direction - * \param[in] magneticZ Magnetic field in the z-direction - * \return Real The total MHD pressure - */ - inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ) - { - Real pTot = gasPressure + 0.5 * (magneticX*magneticX + ((magneticY*magneticY) + (magneticZ*magneticZ))); +// ========================================================================= +/*! + * \brief Compute the MHD thermal energy in a cell + * + * \param[in] energyTot The total energy + * \param[in] density The density + * \param[in] momentumX Momentum in the x-direction + * \param[in] momentumY Momentum in the y-direction + * \param[in] momentumZ Momentum in the z-direction + * \param[in] magneticX Magnetic field in the x-direction + * \param[in] magneticY Magnetic field in the y-direction + * \param[in] magneticZ Magnetic field in the z-direction + * \param[in] gamma The adiabatic index + * \return Real The thermal energy in a cell + */ +inline __host__ __device__ Real computeThermalEnergy(Real const &energyTot, Real const &density, Real const &momentumX, + Real const &momentumY, Real const &momentumZ, + Real const &magneticX, Real const &magneticY, + Real const &magneticZ, Real const &gamma) +{ + return energyTot - 0.5 * math_utils::SquareMagnitude(momentumX, momentumY, momentumZ) / fmax(density, TINY_NUMBER) - + computeMagneticEnergy(magneticX, magneticY, magneticZ); +} +// ========================================================================= - return fmax(pTot, TINY_NUMBER); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the total MHD pressure. I.e. magnetic pressure + gas + * pressure + * + * \param[in] gasPressure The gas pressure + * \param[in] magneticX Magnetic field in the x-direction + * \param[in] magneticY Magnetic field in the y-direction + * \param[in] magneticZ Magnetic field in the z-direction + * \return Real The total MHD pressure + */ +inline __host__ __device__ Real computeTotalPressure(Real const &gasPressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ) +{ + Real pTot = gasPressure + computeMagneticEnergy(magneticX, magneticY, magneticZ); - // ========================================================================= - /*! - * \brief Compute the speed of the fast magnetosonic wave - * - * \param density The gas pressure - * \param pressure The density - * \param magneticX Magnetic field in the x-direction - * \param magneticY Magnetic field in the y-direction - * \param magneticZ Magnetic field in the z-direction - * \param gamma The adiabatic index - * \return Real The speed of the fast magnetosonic wave - */ - inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density, - Real const &pressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - // Compute the sound speed - return _magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - 1.0); - } - // ========================================================================= + return fmax(pTot, TINY_NUMBER); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the speed of the slow magnetosonic wave - * - * \param density The gas pressure - * \param pressure The density - * \param magneticX Magnetic field in the x-direction - * \param magneticY Magnetic field in the y-direction - * \param magneticZ Magnetic field in the z-direction - * \param gamma The adiabatic index - * \return Real The speed of the slow magnetosonic wave - */ - inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density, - Real const &pressure, - Real const &magneticX, - Real const &magneticY, - Real const &magneticZ, - Real const &gamma) - { - // Compute the sound speed - return _magnetosonicSpeed(density, - pressure, - magneticX, - magneticY, - magneticZ, - gamma, - -1.0); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the speed of the fast magnetosonic wave + * + * \param density The gas pressure + * \param pressure The density + * \param magneticX Magnetic field in the x-direction + * \param magneticY Magnetic field in the y-direction + * \param magneticZ Magnetic field in the z-direction + * \param gamma The adiabatic index + * \return Real The speed of the fast magnetosonic wave + */ +inline __host__ __device__ Real fastMagnetosonicSpeed(Real const &density, Real const &pressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + // Compute the sound speed + return mhd::utils::internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, 1.0); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the speed of the Alfven wave in a cell - * - * \param[in] magneticX The magnetic field in the x direction, ie the direction - * along with the Riemann solver is acting - * \param[in] density The density in the cell - * \return Real The Alfven wave speed - */ - inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, - Real const &density) - { - // Compute the Alfven wave speed - return fabs(magneticX) / sqrt(fmax(density,TINY_NUMBER)); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the speed of the slow magnetosonic wave + * + * \param density The gas pressure + * \param pressure The density + * \param magneticX Magnetic field in the x-direction + * \param magneticY Magnetic field in the y-direction + * \param magneticZ Magnetic field in the z-direction + * \param gamma The adiabatic index + * \return Real The speed of the slow magnetosonic wave + */ +inline __host__ __device__ Real slowMagnetosonicSpeed(Real const &density, Real const &pressure, Real const &magneticX, + Real const &magneticY, Real const &magneticZ, Real const &gamma) +{ + // Compute the sound speed + return mhd::utils::internal::_magnetosonicSpeed(density, pressure, magneticX, magneticY, magneticZ, gamma, -1.0); +} +// ========================================================================= - // ========================================================================= - /*! - * \brief Compute the cell centered average of the magnetic fields in a - * given cell - * - * \param[in] dev_conserved A pointer to the device array of conserved variables - * \param[in] id The 1D index into each grid subarray. - * \param[in] xid The x index - * \param[in] yid The y index - * \param[in] zid The z index - * \param[in] n_cells The total number of cells - * \param[in] nx The number of cells in the x-direction - * \param[in] ny The number of cells in the y-direction - * \param[out] avgBx The cell centered average magnetic field in the x-direction - * \param[out] avgBy The cell centered average magnetic field in the y-direction - * \param[out] avgBz The cell centered average magnetic field in the z-direction - */ - inline __host__ __device__ void cellCenteredMagneticFields(Real const *dev_conserved, - size_t const &id, - size_t const &xid, - size_t const &yid, - size_t const &zid, - size_t const &n_cells, - size_t const &nx, - size_t const &ny, - Real &avgBx, - Real &avgBy, - Real &avgBz) - { - avgBx = 0.5 * (dev_conserved[(5+NSCALARS)*n_cells + id] + dev_conserved[(5+NSCALARS)*n_cells + ((xid-1) + yid*nx + zid*nx*ny)]); - avgBy = 0.5 * (dev_conserved[(6+NSCALARS)*n_cells + id] + dev_conserved[(6+NSCALARS)*n_cells + (xid + (yid-1)*nx + zid*nx*ny)]); - avgBz = 0.5 * (dev_conserved[(7+NSCALARS)*n_cells + id] + dev_conserved[(7+NSCALARS)*n_cells + (xid + yid*nx + (zid-1)*nx*ny)]); - } - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the speed of the Alfven wave in a cell + * + * \param[in] magneticX The magnetic field in the x direction, ie the direction + * along with the Riemann solver is acting + * \param[in] density The density in the cell + * \return Real The Alfven wave speed + */ +inline __host__ __device__ Real alfvenSpeed(Real const &magneticX, Real const &density) +{ + // Compute the Alfven wave speed + return fabs(magneticX) / sqrt(fmax(density, TINY_NUMBER)); +} +// ========================================================================= -} // end namespace mhdUtils \ No newline at end of file +// ========================================================================= +#ifdef MHD +/*! + * \brief Compute the cell centered average of the magnetic fields in a + * given cell + * + * \param[in] dev_conserved A pointer to the device array of conserved variables + * \param[in] id The 1D index into each grid subarray. + * \param[in] xid The x index + * \param[in] yid The y index + * \param[in] zid The z index + * \param[in] n_cells The total number of cells + * \param[in] nx The number of cells in the x-direction + * \param[in] ny The number of cells in the y-direction + * \param[out] avgBx The cell centered average magnetic field in the x-direction + * \param[out] avgBy The cell centered average magnetic field in the y-direction + * \param[out] avgBz The cell centered average magnetic field in the z-direction + * + * \return Real local struct with the X, Y, and Z cell centered magnetic + * fields. Intended to be called with structured binding like `auto [x, y, + * z] = mhd::utils::cellCenteredMagneticFields(*args*) + */ +inline __host__ __device__ auto cellCenteredMagneticFields(Real const *dev_conserved, size_t const &id, + size_t const &xid, size_t const &yid, size_t const &zid, + size_t const &n_cells, size_t const &nx, size_t const &ny) +{ + // Ternary operator to check that no values outside of the magnetic field + // arrays are loaded. If the cell is on the edge that doesn't have magnetic + // fields on both sides then instead set the centered magnetic field to be + // equal to the magnetic field of the closest edge. + Real avgBx = (xid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_x)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_x)*n_cells + + cuda_utilities::compute1DIndex(xid - 1, yid, zid, nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_x)*n_cells + id]; + Real avgBy = (yid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_y)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_y)*n_cells + + cuda_utilities::compute1DIndex(xid, yid - 1, zid, nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_y)*n_cells + id]; + Real avgBz = (zid > 0) ? + /*if true*/ 0.5 * (dev_conserved[(grid_enum::magnetic_z)*n_cells + id] + + dev_conserved[(grid_enum::magnetic_z)*n_cells + + cuda_utilities::compute1DIndex(xid, yid, zid - 1, nx, ny)]) + : + /*if false*/ dev_conserved[(grid_enum::magnetic_z)*n_cells + id]; + + struct ReturnStruct { + Real x, y, z; + }; + return ReturnStruct{avgBx, avgBy, avgBz}; +} +// ========================================================================= + +// ========================================================================= +/*! + * \brief Initialize the magnitice field from the vector potential + * + * \param H The Header struct + * \param C The Conserved struct + * \param vectorPotential The vector potential in the same format as the other arrays in Cholla + */ +void Init_Magnetic_Field_With_Vector_Potential(Header const &H, Grid3D::Conserved const &C, + std::vector const &vectorPotential); +// ========================================================================= +#endif // MHD +} // end namespace mhd::utils diff --git a/src/utils/mhd_utilities_tests.cpp b/src/utils/mhd_utilities_tests.cpp deleted file mode 100644 index c5cbb25fb..000000000 --- a/src/utils/mhd_utilities_tests.cpp +++ /dev/null @@ -1,509 +0,0 @@ -/*! - * \file mhd_utilities_tests.cpp - * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Tests for the contents of mhd_utilities.h and mhd_utilities.cpp - * - */ - -// STL Includes -#include -#include -#include -#include -#include - -// External Includes -#include // Include GoogleTest and related libraries/headers - -// Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/mhd_utilities.h" -#include "../global/global.h" - -// ============================================================================= -// Local helper functions -namespace -{ - struct testParams - { - double gamma = 5./3.; - std::vector density {8.4087201154e-100, 1.6756968986e2, 5.4882403847e100}; - std::vector velocityX {7.0378624601e-100, 7.0829278656e2, 1.8800514112e100}; - std::vector velocityY {7.3583469014e-100, 5.9283073464e2, 5.2725717864e100}; - std::vector velocityZ {1.7182972216e-100, 8.8417748226e2, 1.5855352639e100}; - std::vector momentumX {8.2340416681e-100, 8.1019429453e2, 5.5062596954e100}; - std::vector momentumY {4.9924582299e-100, 7.1254780684e2, 6.5939640992e100}; - std::vector momentumZ {3.6703192739e-100, 7.5676716066e2, 7.2115881803e100}; - std::vector energy {3.0342082433e-100, 7.6976906577e2, 1.9487120853e100}; - std::vector pressureGas {2.2244082909e-100, 8.6772951021e2, 6.7261085663e100}; - std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; - std::vector magneticX {2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; - std::vector magneticY {9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; - std::vector magneticZ {9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; - std::vector names{"Small number case", "Medium number case", "Large number case"}; - }; -} -// ============================================================================= - - -// ============================================================================= -// Tests for the mhdUtils::computeEnergy function -// ============================================================================= -/*! - * \brief Test the mhdUtils::computeEnergy function with the standard set of - * parameters - * - */ -TEST(tMHDComputeEnergy, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, - 137786230.15630624, - 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testEnergy = mhdUtils::computeEnergy(parameters.pressureGas.at(i), - parameters.density.at(i), - parameters.velocityX.at(i), - parameters.velocityY.at(i), - parameters.velocityZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), - testEnergy, - parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhdUtils::computeEnergy function with a the standard set of - * parameters except pressure is now negative - * - */ -TEST(tMHDComputeEnergy, - NegativePressureExpectAutomaticFix) -{ - testParams parameters; - std::vector fiducialEnergies{3.3366124363499995e-100, - 137784928.56204093, - 9.2884430880010847e+301}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testEnergy = mhdUtils::computeEnergy(-parameters.pressureGas.at(i), - parameters.density.at(i), - parameters.velocityX.at(i), - parameters.velocityY.at(i), - parameters.velocityZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialEnergies.at(i), - testEnergy, - parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhdUtils::computeEnergy function -// ============================================================================= - -// ============================================================================= -// Tests for the mhdUtils::computeGasPressure function -// ============================================================================= -/*! - * \brief Test the mhdUtils::computeGasPressure function with the standard set of - * parameters. Energy has been increased to avoid negative pressures - * - */ -TEST(tMHDComputeGasPressure, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector energyMultiplier{3, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{1.8586864490415075e-100, - 4591434.7663756227, - 1.29869419465575e+205}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testGasPressure = mhdUtils::computeGasPressure(energyMultiplier.at(i) * parameters.energy.at(i), - parameters.density.at(i), - parameters.momentumX.at(i), - parameters.momentumY.at(i), - parameters.momentumZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialGasPressures.at(i), - testGasPressure, - parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhdUtils::computeGasPressure function with a the standard set - * of parameters which produce negative pressures - * - */ -TEST(tMHDComputeGasPressure, - NegativePressureExpectAutomaticFix) -{ - testParams parameters; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testGasPressure = mhdUtils::computeGasPressure(parameters.energy.at(i), - parameters.density.at(i), - parameters.momentumX.at(i), - parameters.momentumY.at(i), - parameters.momentumZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - // I'm using the binary equality assertion here since in the case of - // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testGasPressure) - << "Difference in " << parameters.names.at(i) << std::endl; - } -} -// ============================================================================= -// End of tests for the mhdUtils::computeGasPressure function -// ============================================================================= - - -// ============================================================================= -// Tests for the mhdUtils::computeThermalEnergy function -// ============================================================================= -/*! - * \brief Test the mhdUtils::computeThermalEnergy function with the standard set - * of parameters. - * - */ -TEST(tMHDComputeThermalEnergy, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; - std::vector fiducialGasPressures{3.0342082433e-15, - 6887152.1495634327, - 1.9480412919836246e+205}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testGasPressure = mhdUtils::computeThermalEnergy(energyMultiplier.at(i) * parameters.energy.at(i), - parameters.density.at(i), - parameters.momentumX.at(i), - parameters.momentumY.at(i), - parameters.momentumZ.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialGasPressures.at(i), - testGasPressure, - parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhdUtils::computeThermalEnergyfunction -// ============================================================================= - -// ============================================================================= -// Tests for the mhdUtils::computeTotalPressure function -// ============================================================================= -/*! - * \brief Test the mhdUtils::computeTotalPressure function with the standard set - * of parameters. - * - */ -TEST(tMHDComputeTotalPressure, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialTotalPressures{9.9999999999999995e-21, - 806223.80964077567, - 6.7079331637514151e+201}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testTotalPressure = mhdUtils::computeTotalPressure(parameters.pressureGas.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i)); - - testingUtilities::checkResults(fiducialTotalPressures.at(i), - testTotalPressure, - parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhdUtils::computeTotalPressure function with a the standard - * set of parameters. Gas pressure has been multiplied and made negative to - * generate negative total pressures - * - */ -TEST(tMHDComputeTotalPressure, - NegativePressureExpectAutomaticFix) -{ - testParams parameters; - std::vector pressureMultiplier{1.0, -1.0e4, -1.0e105}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testTotalPressure = mhdUtils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), - parameters.magneticX.at(i), - parameters.magneticY.at(i), - parameters.magneticZ.at(i)); - - // I'm using the binary equality assertion here since in the case of - // negative pressure the function should return exactly TINY_NUMBER - EXPECT_EQ(TINY_NUMBER, testTotalPressure) - << "Difference in " << parameters.names.at(i) << std::endl; - } -} -// ============================================================================= -// End of tests for the mhdUtils::computeTotalPressure function -// ============================================================================= - -// ============================================================================= -// Tests for the mhdUtils::fastMagnetosonicSpeed function -// ============================================================================= -/*! - * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard - * set of parameters. All values are reduced by 1e-25 in the large number case - * to avoid overflow - * - */ -TEST(tMHDFastMagnetosonicSpeed, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, - 98.062482309387562, - 1.5634816865472293e+38}; - std::vector coef{1.0, 1.0, 1.0e-25}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed( - coef.at(i)*parameters.density.at(i), - coef.at(i)*parameters.pressureGas.at(i), - coef.at(i)*parameters.magneticX.at(i), - coef.at(i)*parameters.magneticY.at(i), - coef.at(i)*parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), - testFastMagnetosonicSpeed, - parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhdUtils::fastMagnetosonicSpeed function with the standard - * set of parameters, density is negative. All values are reduced by 1e-25 in - * the large number case to avoid overflow. - * - */ -TEST(tMHDFastMagnetosonicSpeed, - NegativeDensityExpectAutomaticFix) -{ - testParams parameters; - std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, - 12694062010603.15, - 1.1582688085027081e+86}; - std::vector coef{1.0, 1.0, 1.0e-25}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testFastMagnetosonicSpeed = mhdUtils::fastMagnetosonicSpeed( - -coef.at(i)*parameters.density.at(i), - coef.at(i)*parameters.pressureGas.at(i), - coef.at(i)*parameters.magneticX.at(i), - coef.at(i)*parameters.magneticY.at(i), - coef.at(i)*parameters.magneticZ.at(i), - parameters.gamma); - - testingUtilities::checkResults(fiducialFastMagnetosonicSpeed.at(i), - testFastMagnetosonicSpeed, - parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhdUtils::fastMagnetosonicSpeed function -// ============================================================================= - -// ============================================================================= -// Tests for the mhdUtils::slowMagnetosonicSpeed function -// ============================================================================= -/*! - * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard - * set of parameters. All values are reduced by 1e-25 in the large number case - * to avoid overflow - * - */ -TEST(tMHDSlowMagnetosonicSpeed, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialSlowMagnetosonicSpeed{0.0, - 2.138424778167535, - 0.26678309355540852}; - // Coefficient to make sure the output is well defined and not nan or inf - double const coef = 1E-95; - - for (size_t i = 2; i < parameters.names.size(); i++) - { - Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( - parameters.density.at(i) * coef, - parameters.pressureGas.at(i) * coef, - parameters.magneticX.at(i) * coef, - parameters.magneticY.at(i) * coef, - parameters.magneticZ.at(i) * coef, - parameters.gamma); - - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), - testSlowMagnetosonicSpeed, - parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhdUtils::slowMagnetosonicSpeed function with the standard - * set of parameters, density is negative. All values are reduced by 1e-25 in - * the large number case to avoid overflow. - * - */ -TEST(tMHDSlowMagnetosonicSpeed, - NegativeDensityExpectAutomaticFix) -{ - testParams parameters; - std::vector fiducialSlowMagnetosonicSpeed{0.0, - 276816332809.37604, - 1976400098318.3574}; - // Coefficient to make sure the output is well defined and not nan or inf - double const coef = 1E-95; - - for (size_t i = 2; i < parameters.names.size(); i++) - { - Real testSlowMagnetosonicSpeed = mhdUtils::slowMagnetosonicSpeed( - -parameters.density.at(i) * coef, - parameters.pressureGas.at(i) * coef, - parameters.magneticX.at(i) * coef, - parameters.magneticY.at(i) * coef, - parameters.magneticZ.at(i) * coef, - parameters.gamma); - - testingUtilities::checkResults(fiducialSlowMagnetosonicSpeed.at(i), - testSlowMagnetosonicSpeed, - parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhdUtils::slowMagnetosonicSpeed function -// ============================================================================= - -// ============================================================================= -// Tests for the mhdUtils::alfvenSpeed function -// ============================================================================= -/*! - * \brief Test the mhdUtils::alfvenSpeed function with the standard set of - * parameters. - * - */ -TEST(tMHDAlfvenSpeed, - CorrectInputExpectCorrectOutput) -{ - testParams parameters; - std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, - 71.380245120271113, - 9.2291462785524423e+49}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i), - parameters.density.at(i)); - - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), - testAlfvenSpeed, - parameters.names.at(i)); - } -} - -/*! - * \brief Test the mhdUtils::alfvenSpeed function with the standard set of - * parameters except density is negative - * - */ -TEST(tMHDAlfvenSpeed, - NegativeDensityExpectAutomaticFix) -{ - testParams parameters; - std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, - 9240080778600, - 2.1621115263999998e+110}; - - for (size_t i = 0; i < parameters.names.size(); i++) - { - Real testAlfvenSpeed = mhdUtils::alfvenSpeed(parameters.magneticX.at(i), - -parameters.density.at(i)); - - testingUtilities::checkResults(fiducialAlfvenSpeed.at(i), - testAlfvenSpeed, - parameters.names.at(i)); - } -} -// ============================================================================= -// End of tests for the mhdUtils::alfvenSpeed function -// ============================================================================= - -// ============================================================================= -// Tests for the mhdUtils::cellCenteredMagneticFields function -// ============================================================================= -TEST(tMHDCellCenteredMagneticFields, - CorrectInputExpectCorrectOutput) -{ - // Initialize the test grid and other state variables - size_t const nx = 3, ny = nx; - size_t const xid = std::floor(nx/2), yid = xid, zid = xid; - size_t const id = xid + yid*nx + zid*nx*ny; - - size_t const n_cells = std::pow(5,3); - // Make sure the vector is large enough that the locations where the - // magnetic field would be in the real grid are filled - std::vector testGrid(n_cells * (8+NSCALARS)); - // Populate the grid with values where testGrid.at(i) = double(i). The - // values chosen aren't that important, just that every cell has a unique - // value - std::iota(std::begin(testGrid), std::end(testGrid), 0.); - - // Fiducial and test variables - double const fiducialAvgBx = 637.5, - fiducialAvgBy = 761.5, - fiducialAvgBz = 883.5; - double testAvgBx, testAvgBy, testAvgBz; - - // Call the function to test - mhdUtils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny, testAvgBx, testAvgBy, testAvgBz); - - // Check the results - testingUtilities::checkResults(fiducialAvgBx, testAvgBx, "cell centered Bx value"); - testingUtilities::checkResults(fiducialAvgBy, testAvgBy, "cell centered By value"); - testingUtilities::checkResults(fiducialAvgBz, testAvgBz, "cell centered Bz value"); -} -// ============================================================================= -// End of tests for the mhdUtils::cellCenteredMagneticFields function -// ============================================================================= diff --git a/src/utils/mhd_utilities_tests.cu b/src/utils/mhd_utilities_tests.cu new file mode 100644 index 000000000..be9b48c78 --- /dev/null +++ b/src/utils/mhd_utilities_tests.cu @@ -0,0 +1,385 @@ +/*! + * \file mhd_utilities_tests.cpp + * \author Robert 'Bob' Caddy (rvc@pitt.edu) + * \brief Tests for the contents of mhd_utilities.h and mhd_utilities.cpp + * + */ + +// STL Includes +#include +#include +#include +#include +#include +#include + +// External Includes +#include // Include GoogleTest and related libraries/headers + +// Local Includes +#include "../global/global.h" +#include "../grid/grid3D.h" +#include "../utils/mhd_utilities.h" +#include "../utils/testing_utilities.h" + +// ============================================================================= +// Local helper functions +namespace +{ +struct TestParams { + double gamma = 5. / 3.; + std::vector density{8.4087201154e-100, 1.6756968986e2, 5.4882403847e100}; + std::vector velocityX{7.0378624601e-100, 7.0829278656e2, 1.8800514112e100}; + std::vector velocityY{7.3583469014e-100, 5.9283073464e2, 5.2725717864e100}; + std::vector velocityZ{1.7182972216e-100, 8.8417748226e2, 1.5855352639e100}; + std::vector momentumX{8.2340416681e-100, 8.1019429453e2, 5.5062596954e100}; + std::vector momentumY{4.9924582299e-100, 7.1254780684e2, 6.5939640992e100}; + std::vector momentumZ{3.6703192739e-100, 7.5676716066e2, 7.2115881803e100}; + std::vector energy{3.0342082433e-100, 7.6976906577e2, 1.9487120853e100}; + std::vector pressureGas{2.2244082909e-100, 8.6772951021e2, 6.7261085663e100}; + std::vector pressureTotal{8.1704748693e-100, 2.6084125198e2, 1.8242151369e100}; + std::vector magneticX{2.8568843801e-100, 9.2400807786e2, 2.1621115264e100}; + std::vector magneticY{9.2900880344e-100, 8.0382409757e2, 6.6499532343e100}; + std::vector magneticZ{9.5795678229e-100, 3.3284839263e2, 9.2337456649e100}; + std::vector names{"Small number case", "Medium number case", "Large number case"}; +}; +} // namespace +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::computeThermalEnergy function +// ============================================================================= +/*! + * \brief Test the mhd::utils::computeThermalEnergy function with the standard + * set of parameters. + * + */ +TEST(tMHDComputeThermalEnergy, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; + std::vector fiducialGasPressures{3.0342082433e-15, 6887152.1495634327, 1.9480412919836246e+205}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testGasPressure = mhd::utils::computeThermalEnergy( + energyMultiplier.at(i) * parameters.energy.at(i), parameters.density.at(i), parameters.momentumX.at(i), + parameters.momentumY.at(i), parameters.momentumZ.at(i), parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i), parameters.gamma); + + testing_utilities::Check_Results(fiducialGasPressures.at(i), testGasPressure, parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::computeThermalEnergy function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::computeMagneticEnergy function +// ============================================================================= +/*! + * \brief Test the mhd::utils::computeMagneticEnergy function with the standard + * set of parameters. + * + */ +TEST(tMHDcomputeMagneticEnergy, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector energyMultiplier{1.0E85, 1.0E4, 1.0E105}; + std::vector fiducialEnergy{0.0, 805356.08013056568, 6.7079331637514162e+201}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testMagneticEnergy = mhd::utils::computeMagneticEnergy(parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i)); + + testing_utilities::Check_Results(fiducialEnergy.at(i), testMagneticEnergy, parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::computeMagneticEnergy function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::computeTotalPressure function +// ============================================================================= +/*! + * \brief Test the mhd::utils::computeTotalPressure function with the standard + * set of parameters. + * + */ +TEST(tMHDComputeTotalPressure, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialTotalPressures{9.9999999999999995e-21, 806223.80964077567, 6.7079331637514151e+201}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testTotalPressure = mhd::utils::computeTotalPressure(parameters.pressureGas.at(i), parameters.magneticX.at(i), + parameters.magneticY.at(i), parameters.magneticZ.at(i)); + + testing_utilities::Check_Results(fiducialTotalPressures.at(i), testTotalPressure, parameters.names.at(i)); + } +} + +/*! + * \brief Test the mhd::utils::computeTotalPressure function with a the standard + * set of parameters. Gas pressure has been multiplied and made negative to + * generate negative total pressures + * + */ +TEST(tMHDComputeTotalPressure, NegativePressureExpectAutomaticFix) +{ + TestParams parameters; + std::vector pressureMultiplier{1.0, -1.0e4, -1.0e105}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testTotalPressure = mhd::utils::computeTotalPressure(pressureMultiplier.at(i) * parameters.pressureGas.at(i), + parameters.magneticX.at(i), parameters.magneticY.at(i), + parameters.magneticZ.at(i)); + + // I'm using the binary equality assertion here since in the case of + // negative pressure the function should return exactly TINY_NUMBER + EXPECT_EQ(TINY_NUMBER, testTotalPressure) << "Difference in " << parameters.names.at(i) << std::endl; + } +} +// ============================================================================= +// End of tests for the mhd::utils::computeTotalPressure function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::fastMagnetosonicSpeed function +// ============================================================================= +/*! + * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard + * set of parameters. All values are reduced by 1e-25 in the large number case + * to avoid overflow + * + */ +TEST(tMHDFastMagnetosonicSpeed, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 98.062482309387562, 1.5634816865472293e+38}; + std::vector coef{1.0, 1.0, 1.0e-25}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( + coef.at(i) * parameters.density.at(i), coef.at(i) * parameters.pressureGas.at(i), + coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), + coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); + + testing_utilities::Check_Results(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, + parameters.names.at(i)); + } +} + +/*! + * \brief Test the mhd::utils::fastMagnetosonicSpeed function with the standard + * set of parameters, density is negative. All values are reduced by 1e-25 in + * the large number case to avoid overflow. + * + */ +TEST(tMHDFastMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) +{ + TestParams parameters; + std::vector fiducialFastMagnetosonicSpeed{1.9254472601190615e-40, 12694062010603.15, 1.1582688085027081e+86}; + std::vector coef{1.0, 1.0, 1.0e-25}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testFastMagnetosonicSpeed = mhd::utils::fastMagnetosonicSpeed( + -coef.at(i) * parameters.density.at(i), coef.at(i) * parameters.pressureGas.at(i), + coef.at(i) * parameters.magneticX.at(i), coef.at(i) * parameters.magneticY.at(i), + coef.at(i) * parameters.magneticZ.at(i), parameters.gamma); + + testing_utilities::Check_Results(fiducialFastMagnetosonicSpeed.at(i), testFastMagnetosonicSpeed, + parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::fastMagnetosonicSpeed function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::slowMagnetosonicSpeed function +// ============================================================================= +/*! + * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard + * set of parameters. All values are reduced by 1e-25 in the large number case + * to avoid overflow + * + */ +TEST(tMHDSlowMagnetosonicSpeed, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialSlowMagnetosonicSpeed{0.0, 2.138424778167535, 0.26678309355540852}; + // Coefficient to make sure the output is well defined and not nan or inf + double const coef = 1E-95; + + for (size_t i = 2; i < parameters.names.size(); i++) { + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( + parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, + parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); + + testing_utilities::Check_Results(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, + parameters.names.at(i)); + } +} + +/*! + * \brief Test the mhd::utils::slowMagnetosonicSpeed function with the standard + * set of parameters, density is negative. All values are reduced by 1e-25 in + * the large number case to avoid overflow. + * + */ +TEST(tMHDSlowMagnetosonicSpeed, NegativeDensityExpectAutomaticFix) +{ + TestParams parameters; + std::vector fiducialSlowMagnetosonicSpeed{0.0, 276816332809.37604, 1976400098318.3574}; + // Coefficient to make sure the output is well defined and not nan or inf + double const coef = 1E-95; + + for (size_t i = 2; i < parameters.names.size(); i++) { + Real testSlowMagnetosonicSpeed = mhd::utils::slowMagnetosonicSpeed( + -parameters.density.at(i) * coef, parameters.pressureGas.at(i) * coef, parameters.magneticX.at(i) * coef, + parameters.magneticY.at(i) * coef, parameters.magneticZ.at(i) * coef, parameters.gamma); + + testing_utilities::Check_Results(fiducialSlowMagnetosonicSpeed.at(i), testSlowMagnetosonicSpeed, + parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::slowMagnetosonicSpeed function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::alfvenSpeed function +// ============================================================================= +/*! + * \brief Test the mhd::utils::alfvenSpeed function with the standard set of + * parameters. + * + */ +TEST(tMHDAlfvenSpeed, CorrectInputExpectCorrectOutput) +{ + TestParams parameters; + std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 71.380245120271113, 9.2291462785524423e+49}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), parameters.density.at(i)); + + testing_utilities::Check_Results(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); + } +} + +/*! + * \brief Test the mhd::utils::alfvenSpeed function with the standard set of + * parameters except density is negative + * + */ +TEST(tMHDAlfvenSpeed, NegativeDensityExpectAutomaticFix) +{ + TestParams parameters; + std::vector fiducialAlfvenSpeed{2.8568843800999998e-90, 9240080778600, 2.1621115263999998e+110}; + + for (size_t i = 0; i < parameters.names.size(); i++) { + Real testAlfvenSpeed = mhd::utils::alfvenSpeed(parameters.magneticX.at(i), -parameters.density.at(i)); + + testing_utilities::Check_Results(fiducialAlfvenSpeed.at(i), testAlfvenSpeed, parameters.names.at(i)); + } +} +// ============================================================================= +// End of tests for the mhd::utils::alfvenSpeed function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::cellCenteredMagneticFields function +// ============================================================================= +#ifdef MHD +TEST(tMHDCellCenteredMagneticFields, CorrectInputExpectCorrectOutput) +{ + // Initialize the test grid and other state variables + size_t const nx = 3, ny = nx; + size_t const xid = std::floor(nx / 2), yid = xid, zid = xid; + size_t const id = xid + yid * nx + zid * nx * ny; + + size_t const n_cells = std::pow(5, 3); + // Make sure the vector is large enough that the locations where the + // magnetic field would be in the real grid are filled + std::vector testGrid(n_cells * (grid_enum::num_fields)); + // Populate the grid with values where testGrid.at(i) = double(i). The + // values chosen aren't that important, just that every cell has a unique + // value + std::iota(std::begin(testGrid), std::end(testGrid), 0.); + + // Fiducial and test variables + double const fiducialAvgBx = 637.5, fiducialAvgBy = 761.5, fiducialAvgBz = 883.5; + + // Call the function to test + auto [testAvgBx, testAvgBy, testAvgBz] = + mhd::utils::cellCenteredMagneticFields(testGrid.data(), id, xid, yid, zid, n_cells, nx, ny); + + // Check the results + testing_utilities::Check_Results(fiducialAvgBx, testAvgBx, "cell centered Bx value"); + testing_utilities::Check_Results(fiducialAvgBy, testAvgBy, "cell centered By value"); + testing_utilities::Check_Results(fiducialAvgBz, testAvgBz, "cell centered Bz value"); +} +#endif // MHD +// ============================================================================= +// End of tests for the mhd::utils::cellCenteredMagneticFields function +// ============================================================================= + +// ============================================================================= +// Tests for the mhd::utils::Init_Magnetic_Field_With_Vector_Potential function +// ============================================================================= +#ifdef MHD +TEST(tMHDInitMagneticFieldWithVectorPotential, CorrectInputExpectCorrectOutput) +{ + // Mock up Header and Conserved structs + Header H; + Grid3D::Conserved C; + + H.nx = 2; + H.ny = 2; + H.nz = 2; + H.n_cells = H.nx * H.ny * H.nz; + H.dx = 0.2; + H.dy = 0.2; + H.dz = 0.2; + + double const default_fiducial = -999; + std::vector conserved_vector(H.n_cells * grid_enum::num_fields, default_fiducial); + C.host = conserved_vector.data(); + C.density = &(C.host[grid_enum::density * H.n_cells]); + C.momentum_x = &(C.host[grid_enum::momentum_x * H.n_cells]); + C.momentum_y = &(C.host[grid_enum::momentum_y * H.n_cells]); + C.momentum_z = &(C.host[grid_enum::momentum_z * H.n_cells]); + C.Energy = &(C.host[grid_enum::Energy * H.n_cells]); + C.magnetic_x = &(C.host[grid_enum::magnetic_x * H.n_cells]); + C.magnetic_y = &(C.host[grid_enum::magnetic_y * H.n_cells]); + C.magnetic_z = &(C.host[grid_enum::magnetic_z * H.n_cells]); + + // Mock up vector potential + std::vector vector_potential(H.n_cells * 3, 0); + std::iota(vector_potential.begin(), vector_potential.end(), 0); + + // Run the function + mhd::utils::Init_Magnetic_Field_With_Vector_Potential(H, C, vector_potential); + + // Check the results + double const bx_fiducial = -10.0; + double const by_fiducial = 15.0; + double const bz_fiducial = -5.0; + + for (size_t i = 0; i < conserved_vector.size(); i++) { + if (i == 47) { + testing_utilities::Check_Results(bx_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } else if (i == 55) { + testing_utilities::Check_Results(by_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } else if (i == 63) { + testing_utilities::Check_Results(bz_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } else { + testing_utilities::Check_Results(default_fiducial, conserved_vector.at(i), "value at i = " + std::to_string(i)); + } + } +} +#endif // MHD +// ============================================================================= +// End of tests for the mhd::utils::Init_Magnetic_Field_With_Vector_Potential function +// ============================================================================= diff --git a/src/utils/parallel_omp.cpp b/src/utils/parallel_omp.cpp index 90a70c914..1e633ef07 100644 --- a/src/utils/parallel_omp.cpp +++ b/src/utils/parallel_omp.cpp @@ -1,56 +1,58 @@ #ifdef PARALLEL_OMP -#include "../utils/parallel_omp.h" - -void Get_OMP_Grid_Indxs( int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, int *omp_gridIndx_end ){ + #include "../utils/parallel_omp.h" +void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, + int *omp_gridIndx_end) +{ int grid_reminder, n_grid_omp, g_start, g_end; grid_reminder = n_grid_cells % n_omp_procs; - n_grid_omp = n_grid_cells / n_omp_procs; + n_grid_omp = n_grid_cells / n_omp_procs; - g_start = 0; + g_start = 0; int counter = 0; - while ( counter < omp_proc_id ){ + while (counter < omp_proc_id) { g_start += n_grid_omp; - if ( counter < grid_reminder ) g_start += 1; + if (counter < grid_reminder) { + g_start += 1; + } counter += 1; } g_end = g_start + n_grid_omp; - if ( omp_proc_id < grid_reminder ) g_end += 1; + if (omp_proc_id < grid_reminder) { + g_end += 1; + } *omp_gridIndx_start = g_start; - *omp_gridIndx_end = g_end; - + *omp_gridIndx_end = g_end; } -#ifdef PARTICLES -void Get_OMP_Particles_Indxs( part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end ){ - + #ifdef PARTICLES +void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, + part_int_t *omp_pIndx_end) +{ part_int_t n_parts_omp, parts_reminder, p_start, p_end; parts_reminder = n_parts_local % n_omp_procs; - n_parts_omp = n_parts_local / n_omp_procs; + n_parts_omp = n_parts_local / n_omp_procs; - p_start = 0; + p_start = 0; int counter = 0; - while ( counter < omp_proc_id ){ + while (counter < omp_proc_id) { p_start += n_parts_omp; - if ( counter < parts_reminder ) p_start += 1; + if (counter < parts_reminder) { + p_start += 1; + } counter += 1; } p_end = p_start + n_parts_omp; - if ( omp_proc_id < parts_reminder ) p_end += 1; + if (omp_proc_id < parts_reminder) { + p_end += 1; + } *omp_pIndx_start = p_start; - *omp_pIndx_end = p_end; - + *omp_pIndx_end = p_end; } -#endif - - - - - - + #endif #endif diff --git a/src/utils/parallel_omp.h b/src/utils/parallel_omp.h index b115dcb76..5e8f6cffa 100644 --- a/src/utils/parallel_omp.h +++ b/src/utils/parallel_omp.h @@ -1,20 +1,24 @@ #ifdef PARALLEL_OMP -#ifndef PARALLEL_OMP_H -#define PARALLEL_OMP_H + #ifndef PARALLEL_OMP_H + #define PARALLEL_OMP_H -#include -#include -#include "math.h" -#include "../global/global.h" -#include -#include + #include + #include + #include -void Get_OMP_Grid_Indxs( int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, int *omp_gridIndx_end ); + #include -#ifdef PARTICLES -void Get_OMP_Particles_Indxs( part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, part_int_t *omp_pIndx_end ); -#endif + #include "../global/global.h" + #include "math.h" -#endif +void Get_OMP_Grid_Indxs(int n_grid_cells, int n_omp_procs, int omp_proc_id, int *omp_gridIndx_start, + int *omp_gridIndx_end); + + #ifdef PARTICLES +void Get_OMP_Particles_Indxs(part_int_t n_parts_local, int n_omp_procs, int omp_proc_id, part_int_t *omp_pIndx_start, + part_int_t *omp_pIndx_end); + #endif + + #endif #endif diff --git a/src/utils/prng_utilities.h b/src/utils/prng_utilities.h index 47e628a77..4eacbb0f1 100644 --- a/src/utils/prng_utilities.h +++ b/src/utils/prng_utilities.h @@ -1,39 +1,39 @@ // STL Includes -#include #include +#include #include // Local includes #include "../global/global.h" - #pragma once class ChollaPrngGenerator { -public: - std::mt19937_64 inline static generator; + public: + std::mt19937_64 inline static generator; - ChollaPrngGenerator(struct parameters *P) - { - // If the seed isn't defined in the settings file or argv then generate - // a random seed - if (P->prng_seed == 0) - { - // Since std::random_device isn't guaranteed to be random or - // different for each rank we're going to convert both the base seed - // and MPI rank to strings, concatenated them, then hash the result. - // This should give a fairly random seed even if std::random_device - // isn't random - std::string hashString = std::to_string(std::random_device{}()) - + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()) - + std::to_string(static_cast(procID)); - std::size_t hashedSeed = std::hash{}(hashString); - P->prng_seed = static_cast(hashedSeed); - } + ChollaPrngGenerator(struct Parameters *P) + { + // If the seed isn't defined in the settings file or argv then generate + // a random seed + if (P->prng_seed == 0) { + // Since std::random_device isn't guaranteed to be random or + // different for each rank we're going to convert both the base seed + // and MPI rank to strings, concatenated them, then hash the result. + // This should give a fairly random seed even if std::random_device + // isn't random + std::string hashString = std::to_string(std::random_device{}()) +#ifdef MPI_CHOLLA + + std::to_string(static_cast(procID)) +#endif + + std::to_string(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + std::size_t hashedSeed = std::hash{}(hashString); + P->prng_seed = static_cast(hashedSeed); + } - // Initialize the PRNG - generator.seed(P->prng_seed); - }; - ~ChollaPrngGenerator() = default; + // Initialize the PRNG + generator.seed(P->prng_seed); + }; + ~ChollaPrngGenerator() = default; }; diff --git a/src/utils/ran.h b/src/utils/ran.h deleted file mode 100644 index 09a0b8868..000000000 --- a/src/utils/ran.h +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include - -typedef unsigned long long int Ullong; -typedef double Doub; -typedef unsigned int Uint; - -struct Ran { - - Ullong u,v,w; - Ran(Ullong j) : v(4101842887655102017LL), w(1) { - u = j^v; int64(); - v = u; int64(); - w = v; int64(); - } - inline Ullong int64() { - u = u * 2862933555777941757LL + 7046029254386353087LL; - v ^= v >> 17; v ^= v << 31; v ^= v >> 8; - w = 4294957665U*(w & 0xffffffff) + (w >> 32); - Ullong x = u ^ (u << 21); x ^= x >> 35; x ^= x << 4; - return (x + v) ^ w; - } - inline Doub doub() { return 5.42101086242752217E-20 * int64(); } - inline Uint int32() { return (Uint)int64(); } - -}; diff --git a/src/utils/reduction_utilities.cu b/src/utils/reduction_utilities.cu index 820f27826..6434f560b 100644 --- a/src/utils/reduction_utilities.cu +++ b/src/utils/reduction_utilities.cu @@ -13,48 +13,29 @@ // Local Includes #include "../utils/reduction_utilities.h" -#ifdef CUDA - namespace reduction_utilities - { - // ===================================================================== - __global__ void kernelReduceMax(Real *in, Real* out, size_t N) - { - // Initialize maxVal to the smallest possible number - Real maxVal = -DBL_MAX; - - // Grid stride loop to perform as much of the reduction as possible - for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; - i < N; - i += blockDim.x * gridDim.x) - { - // A transformation could go here - - // Grid stride reduction - maxVal = max(maxVal,in[i]); - } - - // Find the maximum val in the grid and write it to `out`. Note that - // there is no execution/memory barrier after this and so the - // reduced scalar is not available for use in this kernel. The grid - // wide barrier can be accomplished by ending this kernel here and - // then launching a new one or by using cooperative groups. If this - // becomes a need it can be added later - gridReduceMax(maxVal, out); - } - // ===================================================================== - - // ===================================================================== - void reductionLaunchParams(uint &numBlocks, uint &threadsPerBlock, uint const &deviceNum) - { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, deviceNum); - - // Divide the total number of allowed threads by the number of - // threads per block - threadsPerBlock = prop.maxThreadsPerBlock; - numBlocks = (prop.maxThreadsPerMultiProcessor * prop.multiProcessorCount) - / threadsPerBlock; - } - // ===================================================================== - }//reduction_utilities -#endif //CUDA \ No newline at end of file +namespace reduction_utilities +{ +// ===================================================================== +__global__ void kernelReduceMax(Real* in, Real* out, size_t N) +{ + // Initialize maxVal to the smallest possible number + Real maxVal = -DBL_MAX; + + // Grid stride loop to perform as much of the reduction as possible + for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N; i += blockDim.x * gridDim.x) { + // A transformation could go here + + // Grid stride reduction + maxVal = max(maxVal, in[i]); + } + + // Find the maximum val in the grid and write it to `out`. Note that + // there is no execution/memory barrier after this and so the + // reduced scalar is not available for use in this kernel. The grid + // wide barrier can be accomplished by ending this kernel here and + // then launching a new one or by using cooperative groups. If this + // becomes a need it can be added later + gridReduceMax(maxVal, out); +} +// ===================================================================== +} // namespace reduction_utilities diff --git a/src/utils/reduction_utilities.h b/src/utils/reduction_utilities.h index 6935d481b..99191d8c5 100644 --- a/src/utils/reduction_utilities.h +++ b/src/utils/reduction_utilities.h @@ -8,7 +8,7 @@ #pragma once // STL Includes -#include +#include // External Includes @@ -17,177 +17,290 @@ #include "../global/global_cuda.h" #include "../utils/gpu.hpp" -#ifdef CUDA - /*! - * \brief Namespace to contain device resident reduction functions. Includes - * functions and kernels for array reduction, warp level, block level, and - * grid level reductions. - * - */ - namespace reduction_utilities - { - // ===================================================================== - /*! - * \brief Perform a reduction within the warp/wavefront to find the - * maximum value of `val` - * - * \param[in] val The thread local variable to find the maximum of across - * the warp - * \return Real The maximum value of `val` within the warp - */ - __inline__ __device__ Real warpReduceMax(Real val) - { - for (int offset = warpSize/2; offset > 0; offset /= 2) - { - val = max(val, __shfl_down(val, offset)); - } - return val; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Perform a reduction within the block to find the maximum value - * of `val` - * - * \param[in] val The thread local variable to find the maximum of across - * the block - * \return Real The maximum value of `val` within the block - */ - __inline__ __device__ Real blockReduceMax(Real val) - { - // Shared memory for storing the results of each warp-wise partial - // reduction - __shared__ Real shared[::maxWarpsPerBlock]; - - int lane = threadIdx.x % warpSize; // thread ID within the warp, - int warpId = threadIdx.x / warpSize; // ID of the warp itself - - val = warpReduceMax(val); // Each warp performs partial reduction - - if (lane==0) shared[warpId]=val; // Write reduced value to shared memory - - __syncthreads(); // Wait for all partial reductions - - //read from shared memory only if that warp existed - val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; - - if (warpId==0) val = warpReduceMax(val); //Final reduce within first warp - - return val; - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Perform an atomic reduction to find the maximum value of `val` - * - * \param[out] address The pointer to where to store the reduced scalar - * value in device memory - * \param[in] val The thread local variable to find the maximum of across - * the grid. Typically this should be a partial reduction that has - * already been reduced to the block level - */ - __inline__ __device__ double atomicMax_double(double* address, double val) - { - unsigned long long int* address_as_ull = (unsigned long long int*) address; - unsigned long long int old = *address_as_ull, assumed; - // Explanation of loop here: - // https://stackoverflow.com/questions/16077464/atomicadd-for-double-on-gpu - // The loop is to make sure the value at address doesn't change - // between the load at the atomic since the entire operation isn't - // atomic - - // While it appears that this could result in many times more atomic - // operations than required, in practice it's only a handful of - // extra operation even in the worst case. Running with 16,000 - // blocks gives ~8-37 atomics after brief testing - do { - assumed = old; - old = atomicCAS(address_as_ull, - assumed, - __double_as_longlong(fmax(__longlong_as_double(assumed),val))); - } while (assumed != old); - return __longlong_as_double(old); - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Perform a reduction within the grid to find the maximum value - * of `val`. Note that the value of `out` should be set appropriately - * before the kernel launch that uses this function to avoid any - * potential race condition; the `cuda_utilities::setScalarDeviceMemory` - * function exists for this purpose. - * - * \details This function can perform a reduction to find the maximum of - * the thread local variable `val` across the entire grid. It relies on a - * warp-wise reduction using registers followed by a block-wise reduction - * using shared memory, and finally a grid-wise reduction using atomics. - * As a result the performance of this function is substantally improved - * by using as many threads per block as possible and as few blocks as - * possible since each block has to perform an atomic operation. To - * accomplish this it is reccommened that you use the - * `reductionLaunchParams` functions to get the optimal number of blocks - * and threads per block to launch rather than relying on Cholla defaults - * and then within the kernel using a grid-stride loop to make sure the - * kernel works with any combination of threads and blocks. Note that - * after this function call you cannot use the reduced value in global - * memory since there is no grid wide sync. You can get around this by - * either launching a second kernel to do the next steps or by using - * cooperative groups to perform a grid wide sync. During it's execution - * it also calls multiple __synchThreads and so cannot be called from - * within any kind of thread guard. - * - * \param[in] val The thread local variable to find the maximum of across - * the grid - * \param[out] out The pointer to where to store the reduced scalar value - * in device memory - */ - __inline__ __device__ void gridReduceMax(Real val, Real* out) - { - // __syncthreads(); // Wait for all threads to calculate val; - - // Reduce the entire block in parallel - val = blockReduceMax(val); - - // Write block level reduced value to the output scalar atomically - if (threadIdx.x == 0) atomicMax_double(out, val); - } - // ===================================================================== - - // ===================================================================== - /*! - * \brief Find the maximum value in the array. Make sure to initialize - * `out` correctly before using this kernel; the - * `cuda_utilities::setScalarDeviceMemory` function exists for this - * purpose. If `in` and `out` are the same array that's ok, all the - * loads are completed before the overwrite occurs. - * - * \param[in] in The pointer to the array to reduce in device memory - * \param[out] out The pointer to where to store the reduced scalar - * value in device memory - * \param[in] N The size of the `in` array - */ - __global__ void kernelReduceMax(Real *in, Real* out, size_t N); - // ===================================================================== - - // ===================================================================== - /*! - * \brief Determine the optimal number of blocks and threads per block to - * use when launching a reduction kernel - * - * \param[out] numBlocks The maximum number of blocks that are - * scheduleable by the device in use when each block has the maximum - * number of threads - * \param[out] threadsPerBlock The maximum threads per block supported by - * the device in use - * \param[in] deviceNum optional: which device is being targeted. - * Defaults to zero - */ - void reductionLaunchParams(uint &numBlocks, - uint &threadsPerBlock, - uint const &deviceNum=0); - // ===================================================================== - } // namespace reduction_utilities -#endif //CUDA +/*! + * \brief Namespace to contain device resident reduction functions. Includes + * functions and kernels for array reduction, warp level, block level, and + * grid level reductions. + * + */ +namespace reduction_utilities +{ +// ===================================================================== +/*! + * \brief Perform a reduction within the warp/wavefront to find the + * maximum value of `val` + * + * \param[in] val The thread local variable to find the maximum of across + * the warp + * \return Real The maximum value of `val` within the warp + */ +__inline__ __device__ Real warpReduceMax(Real val) +{ + for (int offset = warpSize / 2; offset > 0; offset /= 2) { + val = max(val, __shfl_down(val, offset)); + } + return val; +} +// ===================================================================== + +// ===================================================================== +/*! + * \brief Perform a reduction within the block to find the maximum value + * of `val` + * + * \param[in] val The thread local variable to find the maximum of across + * the block + * \return Real The maximum value of `val` within the block + */ +__inline__ __device__ Real blockReduceMax(Real val) +{ + // Shared memory for storing the results of each warp-wise partial + // reduction + __shared__ Real shared[::maxWarpsPerBlock]; + + int lane = threadIdx.x % warpSize; // thread ID within the warp, + int warpId = threadIdx.x / warpSize; // ID of the warp itself + + val = warpReduceMax(val); // Each warp performs partial reduction + + if (lane == 0) { + shared[warpId] = val; + } // Write reduced value to shared memory + + __syncthreads(); // Wait for all partial reductions + + // read from shared memory only if that warp existed + val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; + + if (warpId == 0) { + val = warpReduceMax(val); + } // Final reduce within first warp + + return val; +} +// ===================================================================== + +#ifndef O_HIP +// ===================================================================== +// This section handles the atomics. It is complicated because CUDA +// doesn't currently support atomics with non-integral types. +// This code is taken from +// https://github.com/rapidsai/cuml/blob/dc14361ba11c41f7a4e1e6a3625bbadd0f52daf7/cpp/src_prims/stats/minmax.cuh +// with slight tweaks for our use case. +// ===================================================================== +/*! + * \brief Do a device side bit cast + * + * \tparam To The output type + * \tparam From The input type + * \param from The input value + * \return To The bit cast version of From as type To + */ +template +__device__ constexpr To bit_cast(const From& from) noexcept +{ + // TODO: replace with `std::bitcast` once we adopt C++20 or libcu++ adds it + To to{}; + static_assert(sizeof(To) == sizeof(From)); + memcpy(&to, &from, sizeof(To)); + return to; +} + +/*! + * \brief Encode a float as an int + * + * \param val The float to encode + * \return int The encoded int + */ +inline __device__ int encode(float val) +{ + int i = bit_cast(val); + return i >= 0 ? i : (1 << 31) | ~i; // NOLINT(hicpp-signed-bitwise) +} + +/*! + * \brief Encode a double as a long long int + * + * \param val The double to encode + * \return long long The encoded long long int + */ +inline __device__ long long encode(double val) +{ + auto i = bit_cast(val); + return i >= 0 ? i : (1ULL << 63) | ~i; // NOLINT(hicpp-signed-bitwise) +} + +/*! + * \brief Decodes an int as a float + * + * \param val The int to decode + * \return float The decoded float + */ +inline __device__ float decode(int val) +{ + if (val < 0) { + val = (1 << 31) | ~val; // NOLINT(hicpp-signed-bitwise) + } + return bit_cast(val); +} + +/*! + * \brief Decodes a long long int as a double + * + * \param val The long long to decode + * \return double The decoded double + */ +inline __device__ double decode(long long val) +{ + if (val < 0) { + val = (1ULL << 63) | ~val; // NOLINT(hicpp-signed-bitwise) + } + return bit_cast(val); +} +#endif // O_HIP +/*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ float atomicMaxBits(float* address, float val) +{ +#ifdef O_HIP + return atomicMax(address, val); +#else // O_HIP + int old = atomicMax((int*)address, encode(val)); + return decode(old); +#endif // O_HIP +} + +/*! + * \brief Perform an atomic reduction to find the maximum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the maximum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ double atomicMaxBits(double* address, double val) +{ +#ifdef O_HIP + return atomicMax(address, val); +#else // O_HIP + long long old = atomicMax((long long*)address, encode(val)); + return decode(old); +#endif // O_HIP +} + +/*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ float atomicMinBits(float* address, float val) +{ +#ifdef O_HIP + return atomicMin(address, val); +#else // O_HIP + int old = atomicMin((int*)address, encode(val)); + return decode(old); +#endif // O_HIP +} + +/*! + * \brief Perform an atomic reduction to find the minimum value of `val` + * + * \param[out] address The pointer to where to store the reduced scalar + * value in device memory + * \param[in] val The thread local variable to find the minimum of across + * the grid. Typically this should be a partial reduction that has + * already been reduced to the block level + */ +inline __device__ double atomicMinBits(double* address, double val) +{ +#ifdef O_HIP + return atomicMin(address, val); +#else // O_HIP + long long old = atomicMin((long long*)address, encode(val)); + return decode(old); +#endif // O_HIP +} +// ===================================================================== + +// ===================================================================== +/*! + * \brief Perform a reduction within the grid to find the maximum value + * of `val`. Note that the value of `out` should be set appropriately + * before the kernel launch that uses this function to avoid any + * potential race condition; the `cuda_utilities::setScalarDeviceMemory` + * function exists for this purpose. + * of `val`. Note that the value of `out` should be set appropriately + * before the kernel launch that uses this function to avoid any + * potential race condition; the `cuda_utilities::setScalarDeviceMemory` + * function exists for this purpose. + * + * \details This function can perform a reduction to find the maximum of + * the thread local variable `val` across the entire grid. It relies on a + * warp-wise reduction using registers followed by a block-wise reduction + * using shared memory, and finally a grid-wise reduction using atomics. + * As a result the performance of this function is substantally improved + * by using as many threads per block as possible and as few blocks as + * possible since each block has to perform an atomic operation. To + * accomplish this it is reccommened that you use the + * `AutomaticLaunchParams` functions to get the optimal number of blocks + * and threads per block to launch rather than relying on Cholla defaults + * and then within the kernel using a grid-stride loop to make sure the + * kernel works with any combination of threads and blocks. Note that + * after this function call you cannot use the reduced value in global + * memory since there is no grid wide sync. You can get around this by + * either launching a second kernel to do the next steps or by using + * cooperative groups to perform a grid wide sync. During it's execution + * it also calls multiple __synchThreads and so cannot be called from + * within any kind of thread guard. + * + * \param[in] val The thread local variable to find the maximum of across + * the grid + * \param[out] out The pointer to where to store the reduced scalar value + * in device memory + */ +__inline__ __device__ void gridReduceMax(Real val, Real* out) +{ + // Reduce the entire block in parallel + val = blockReduceMax(val); + + // Write block level reduced value to the output scalar atomically + if (threadIdx.x == 0) { + atomicMaxBits(out, val); + } +} +// ===================================================================== + +// ===================================================================== +/*! + * \brief Find the maximum value in the array. Make sure to initialize + * `out` correctly before using this kernel; the + * `cuda_utilities::setScalarDeviceMemory` function exists for this + * purpose. If `in` and `out` are the same array that's ok, all the + * loads are completed before the overwrite occurs. + * \brief Find the maximum value in the array. Make sure to initialize + * `out` correctly before using this kernel; the + * `cuda_utilities::setScalarDeviceMemory` function exists for this + * purpose. If `in` and `out` are the same array that's ok, all the + * loads are completed before the overwrite occurs. + * + * \param[in] in The pointer to the array to reduce in device memory + * \param[out] out The pointer to where to store the reduced scalar + * value in device memory + * \param[in] N The size of the `in` array + */ +__global__ void kernelReduceMax(Real* in, Real* out, size_t N); +// ===================================================================== +} // namespace reduction_utilities diff --git a/src/utils/reduction_utilities_tests.cu b/src/utils/reduction_utilities_tests.cu index 2314b33be..5dd18c197 100644 --- a/src/utils/reduction_utilities_tests.cu +++ b/src/utils/reduction_utilities_tests.cu @@ -1,95 +1,69 @@ /*! * \file reduction_utilities_tests.cpp * \author Robert 'Bob' Caddy (rvc@pitt.edu) - * \brief Tests for the contents of reduction_utilities.h and reduction_utilities.cpp + * \brief Tests for the contents of reduction_utilities.h and + * reduction_utilities.cpp * */ // STL Includes -#include -#include #include #include +#include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local Includes -#include "../utils/testing_utilities.h" -#include "../utils/reduction_utilities.h" #include "../global/global.h" - - +#include "../utils/DeviceVector.h" +#include "../utils/cuda_utilities.h" +#include "../utils/reduction_utilities.h" +#include "../utils/testing_utilities.h" // ============================================================================= // Tests for divergence max reduction // ============================================================================= TEST(tALLKernelReduceMax, CorrectInputExpectCorrectOutput) { - // Launch parameters - // ================= - uint numBlocks, threadsPerBlock; - reduction_utilities::reductionLaunchParams(numBlocks, threadsPerBlock); - - // Grid Parameters & testing parameters - // ==================================== - size_t const gridSize = 64; - size_t const size = std::pow(gridSize, 3);; - Real const maxValue = 4; - std::vector host_grid(size); - Real host_max = -DBL_MAX; - - // Fill grid with random values and assign maximum value - std::mt19937 prng(1); - std::uniform_real_distribution doubleRand(-std::abs(maxValue)-1, std::abs(maxValue) - 1); - std::uniform_int_distribution intRand(0, host_grid.size()-1); - for (size_t i = 0; i < host_grid.size(); i++) - { - host_grid.at(i) = doubleRand(prng); - } - host_grid.at(intRand(prng)) = maxValue; - - - // Allocating and copying to device - // ================================ - Real *dev_grid; - CudaSafeCall(cudaMalloc(&dev_grid, host_grid.size() * sizeof(Real))); - CudaSafeCall(cudaMemcpy(dev_grid, host_grid.data(), host_grid.size() * sizeof(Real), cudaMemcpyHostToDevice)); - - Real *dev_max_array; - CudaSafeCall(cudaMalloc(&dev_max_array, numBlocks*sizeof(Real))); - // Sets all bytes to 0. - cudaMemset(dev_max_array,0,numBlocks*sizeof(Real)); - - Real host_max_array[numBlocks]; - //Real *host_max_array = (Real *) malloc(numBlocks*sizeof(Real)); - //CudaSafeCall( cudaHostAlloc(&host_max_array, numBlocks*sizeof(Real), cudaHostAllocDefault) ); - - - // Do the reduction - // ================ - hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, numBlocks, threadsPerBlock, 0, 0, dev_grid, dev_max_array, host_grid.size()); - CudaCheckError(); - - - // Copy back and sync - // ================== - CudaSafeCall(cudaMemcpy(&host_max_array, dev_max_array, numBlocks*sizeof(Real), cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); - - for (int i = 0; i < numBlocks; i++) - { - host_max = fmax(host_max,host_max_array[i]); - } - - //free(host_max_array); - - cudaFree(dev_max_array); - - cudaFree(dev_grid); - - // Perform comparison - testingUtilities::checkResults(maxValue, host_max, "maximum value found"); + // Launch parameters + // ================= + cuda_utilities::AutomaticLaunchParams static const launchParams(reduction_utilities::kernelReduceMax); + + // Grid Parameters & testing parameters + // ==================================== + size_t const gridSize = 64; + size_t const size = std::pow(gridSize, 3); + ; + Real const maxValue = 4; + std::vector host_grid(size); + + // Fill grid with random values and assign maximum value + std::mt19937 prng(1); + std::uniform_real_distribution doubleRand(-std::abs(maxValue) - 1, std::abs(maxValue) - 1); + std::uniform_int_distribution intRand(0, host_grid.size() - 1); + for (Real& host_data : host_grid) { + host_data = doubleRand(prng); + } + host_grid.at(intRand(prng)) = maxValue; + + // Allocating and copying to device + // ================================ + cuda_utilities::DeviceVector dev_grid(host_grid.size()); + dev_grid.cpyHostToDevice(host_grid); + + cuda_utilities::DeviceVector static dev_max(1); + dev_max.assign(std::numeric_limits::lowest()); + + // Do the reduction + // ================ + hipLaunchKernelGGL(reduction_utilities::kernelReduceMax, launchParams.numBlocks, launchParams.threadsPerBlock, 0, 0, + dev_grid.data(), dev_max.data(), host_grid.size()); + GPU_Error_Check(); + + // Perform comparison + testing_utilities::Check_Results(maxValue, dev_max.at(0), "maximum value found"); } // ============================================================================= // Tests for divergence max reduction diff --git a/src/utils/testing_utilities.cpp b/src/utils/testing_utilities.cpp index 9b8bee948..7b1055ecd 100644 --- a/src/utils/testing_utilities.cpp +++ b/src/utils/testing_utilities.cpp @@ -6,180 +6,120 @@ */ // STL includes -#include #include #include #include +#include // External Includes -#include // Include GoogleTest and related libraries/headers +#include // Include GoogleTest and related libraries/headers // Local includes -#include "../utils/testing_utilities.h" // Include the header file -#include "../system_tests/system_tester.h" // provide systemTest class +#include "../system_tests/system_tester.h" // provide systemTest class +#include "../utils/testing_utilities.h" // Include the header file -namespace testingUtilities +namespace testing_utilities { - // ========================================================================= - int64_t ulpsDistanceDbl(double const &a, double const &b) - { - // Save work if the floats are equal. - // Also handles +0 == -0 - if (a == b) return 0; - - const auto maxInt = std::numeric_limits::max(); - - // If either one is NaN then they are not equal, max distance. - if (std::isnan(a) || std::isnan(b)) return maxInt; +// ========================================================================= +int64_t ulpsDistanceDbl(double const &a, double const &b) +{ + // Save work if the floats are equal. + // Also handles +0 == -0 + if (a == b) { + return 0; + } - // If one's infinite and they're not equal, max distance. - if (std::isinf(a) || std::isinf(b)) return maxInt; + const auto maxInt = std::numeric_limits::max(); - int64_t ia, ib; - std::memcpy(&ia, &a, sizeof(double)); - std::memcpy(&ib, &b, sizeof(double)); + // If either one is NaN then they are not equal, max distance. + if (std::isnan(a) || std::isnan(b)) { + return maxInt; + } - // Don't compare differently-signed floats. - if ((ia < 0) != (ib < 0)) return maxInt; + // If one's infinite and they're not equal, max distance. + if (std::isinf(a) || std::isinf(b)) { + return maxInt; + } - // Return the absolute value of the distance in ULPs. - int64_t distance = ia - ib; - if (distance < 0) distance = -distance; + int64_t ia, ib; + std::memcpy(&ia, &a, sizeof(double)); + std::memcpy(&ib, &b, sizeof(double)); - return distance; - } - // ========================================================================= - - // ========================================================================= - bool nearlyEqualDbl(double const &a, - double const &b, - double &absoluteDiff, - int64_t &ulpsDiff, - double const &fixedEpsilon, // = 1E-14 by default - int const &ulpsEpsilon) // = 4 by default - { - // Compute differences - ulpsDiff = ulpsDistanceDbl(a, b); - absoluteDiff = std::abs(a - b); - - // Perform the ULP check which is for numbers far from zero - if (ulpsDiff <= ulpsEpsilon) - { - return true; - } - // Perform the absolute check which is for numbers near zero - else if (absoluteDiff <= fixedEpsilon) - { - return true; - } - // if none of the checks have passed indicate test failure - else - { - return false; - } - } - // ========================================================================= - - // ========================================================================= - void checkResults(double fiducialNumber, - double testNumber, - std::string outString, - double fixedEpsilon, - int ulpsEpsilon) - { - // Check for equality and if not equal return difference - double absoluteDiff; - int64_t ulpsDiff; - bool areEqual; - - if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff); - } - else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon); - } - else - { - areEqual = testingUtilities::nearlyEqualDbl(fiducialNumber, - testNumber, - absoluteDiff, - ulpsDiff, - fixedEpsilon, - ulpsEpsilon); - } - - EXPECT_TRUE(areEqual) - << "Difference in " << outString << std::endl - << "The fiducial value is: " << fiducialNumber << std::endl - << "The test value is: " << testNumber << std::endl - << "The absolute difference is: " << absoluteDiff << std::endl - << "The ULP difference is: " << ulpsDiff << std::endl; - } - // ========================================================================= - - void wrapperEqual(int i, int j, int k, std::string dataSetName, - double test_value, double fid_value, double fixedEpsilon=5.0E-12) { - - std::string outString; - outString += dataSetName; - outString += " dataset at ["; - outString += i; - outString += ","; - outString += j; - outString += ","; - outString += k; - outString += "]"; - - checkResults(fid_value,test_value,outString,fixedEpsilon); + // Don't compare differently-signed floats. + if ((ia < 0) != (ib < 0)) { + return maxInt; } - void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value) { - std::vector testDims(3,1); - std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); - for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - - wrapperEqual(i,j,k,dataSetName,testData.at(index),value); - } - } - } + // Return the absolute value of the distance in ULPs. + int64_t distance = ia - ib; + if (distance < 0) { + distance = -distance; } - void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, - double constant, double amplitude, - double kx, double ky, double kz, double phase, double tolerance) - { - std::vector testDims(3,1); - std::vector testData = testObject.loadTestFieldData(dataSetName,testDims); - for (size_t i = 0; i < testDims[0]; i++) - { - for (size_t j = 0; j < testDims[1]; j++) - { - for (size_t k = 0; k < testDims[2]; k++) - { - double value = constant + amplitude*std::sin(kx*i+ky*j+kz*k+phase); - size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; - wrapperEqual(i,j,k,dataSetName,testData.at(index),value,tolerance); - } - } - } - } + return distance; +} +// ========================================================================= +// ========================================================================= +bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff, + double const &fixedEpsilon, // = 1E-14 by default + int64_t const &ulpsEpsilon) // = 4 by default +{ + // Compute differences + ulpsDiff = ulpsDistanceDbl(a, b); + absoluteDiff = std::abs(a - b); + // Perform the ULP check which is for numbers far from zero and perform the absolute check which is for numbers near + // zero + return ulpsDiff <= ulpsEpsilon or absoluteDiff <= fixedEpsilon; +} +// ========================================================================= +void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, + double fixedEpsilon = 5.0E-12) +{ + std::string outString; + outString += dataSetName; + outString += " dataset at ["; + outString += std::to_string(i); + outString += ","; + outString += std::to_string(j); + outString += ","; + outString += std::to_string(k); + outString += "]"; + + ASSERT_NO_FATAL_FAILURE(Check_Results<1>(fid_value, test_value, outString, fixedEpsilon)); +} +void analyticConstant(system_test::SystemTestRunner testObject, std::string const &dataSetName, double value) +{ + std::vector testDims(3, 1); + std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); + for (size_t i = 0; i < testDims[0]; i++) { + for (size_t j = 0; j < testDims[1]; j++) { + for (size_t k = 0; k < testDims[2]; k++) { + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i, j, k, dataSetName, testData.at(index), value)); + } + } + } } + +void analyticSine(system_test::SystemTestRunner testObject, std::string const &dataSetName, double constant, + double amplitude, double kx, double ky, double kz, double phase, double tolerance) +{ + std::vector testDims(3, 1); + std::vector testData = testObject.loadTestFieldData(dataSetName, testDims); + for (size_t i = 0; i < testDims[0]; i++) { + for (size_t j = 0; j < testDims[1]; j++) { + for (size_t k = 0; k < testDims[2]; k++) { + double value = constant + amplitude * std::sin(kx * i + ky * j + kz * k + phase); + size_t index = (i * testDims[1] * testDims[2]) + (j * testDims[2]) + k; + ASSERT_NO_FATAL_FAILURE(wrapperEqual(i, j, k, dataSetName, testData.at(index), value, tolerance)); + } + } + } +} + +} // namespace testing_utilities diff --git a/src/utils/testing_utilities.h b/src/utils/testing_utilities.h index 927a61f28..31258e676 100644 --- a/src/utils/testing_utilities.h +++ b/src/utils/testing_utilities.h @@ -9,8 +9,12 @@ #pragma once // STL includes +#include +#include +#include #include -#include "../system_tests/system_tester.h" // provide systemTest class + +#include "../system_tests/system_tester.h" // provide systemTest class // ============================================================================= // NOTE: Global variables are declared as extern at the end of this file @@ -22,143 +26,176 @@ * considered compatible with CUDA/HIP. * */ -namespace testingUtilities +namespace testing_utilities { - // ========================================================================= - /*! - * \brief Compute the Units in the Last Place (ULP) difference between two doubles - * - * \details This function is modified from - * [Comparing Floating-Point Numbers Is Tricky by Matt Kline](https://bitbashing.io/comparing-floats.html) - * which is in turn based on - * [Comparing Floating Point Numbers, 2012 Edition by Bruce Dawson](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/). - * The latter seems to be the bible of floating point comparison and is the - * basis of Googletests ASSERT_DOUBLE_EQ assertion. - * - * This particular function checks that the two numbers if the numbers are - * perfectly equal, +0, -0, Nan, inf, or differently signed then it computes - * the ULP difference between them are returns it - * - * \param[in] a The first double you wish to compare. Order doesn't matter. - * \param[in] b The second double you wish to compare. Order doesn't matter. - * \return int64_t The ULP distance between a and b. - */ - int64_t ulpsDistanceDbl(double const &a, double const &b); - // ========================================================================= - - // ========================================================================= - /*! - * \brief Check if two doubles are nearly equal. - * - * \details This function checks if two doubles are "nearly equal" which is - * defined as either: A) the absolute difference between them is less than - * the fixedEpsilon argument or B) the units in the last place (ULP) - * difference is less than the ulpsEpsilon argument. Both of the epsilon - * arguments have default values which generally should not need to be - * changed. - * - * Why does fixedEpsilon default to 1E-14? Running the Sod shock tube when - * Cholla was compiled with GCC 9.3.0 vs. XL 16.1.1-10 on Summit lead to - * absolute differences in the results up to 1.77636E-15. A priori we chose - * that a difference between two numbers that was less than one order of - * magnitude greater than the difference between compilers would be - * considered "equal". I.e. since the maximum absolute error between the GCC - * and XL compilers was ~1.7E-15 our allowed margin of error should be - * ~1E-14. - * - * Why does ulpsEpsilon default to 4? Repeating the test above I computed - * the largest ULP difference that wasn't caught by the absolute difference - * requirement of 1E-14. It turns out that there were no uncaught - * differences at all so I kept ulpsEpsilon at 4 since that's the Googletest - * default for their floating point assertions - * - * \param[in] a The first double you wish to compare. Order doesn't matter. - * \param[in] b The first double you wish to compare. Order doesn't matter. - * \param[out] absoluteDiff The absolute difference between the numbers. - * Only returned if the numbers are not equal. If the numbers are equal then - * behaviour is undefined - * \param[out] ulpsDiff The ULP difference between the numbers. - * Only returned if the numbers are not equal. If the numbers are equal then - * behaviour is undefined - * \param[in] fixedEpsilon The allowed difference in real numbers. Defaults - * to 1E-14 - * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4 - * \return bool Whether or not the numbers are equal - */ - bool nearlyEqualDbl(double const &a, - double const &b, - double &absoluteDiff, - int64_t &ulpsDiff, - double const &fixedEpsilon = 1E-14, - int const &ulpsEpsilon = 4); - // ========================================================================= - - void wrapperEqual(int i, int j, int k, std::string dataSetName, double test_value, double fid_value, double fixedEpsilon); - - void analyticConstant(systemTest::SystemTestRunner testObject, std::string dataSetName, double value); - - void analyticSine(systemTest::SystemTestRunner testObject, std::string dataSetName, - double constant, double amplitude, double kx, double ky, double kz, - double phase, double tolerance); - - // ========================================================================= - /*! - * \brief A simple function to compare two doubles with the nearlyEqualDbl - * function, perform a GTest assert on the result, and print out the values - * - * \param[in] fiducialNumber The fiducial number to test against - * \param[in] testNumber The unverified number to test - * \param[in] outString A string to be printed in the first line of the output - * message. Format will be "Difference in outString" - * \param[in] fixedEpsilon The fixed epsilon to use in the comparison. - * Negative values are ignored and default behaviour is used - * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative - * values are ignored and default behaviour is used - */ - void checkResults(double fiducialNumber, - double testNumber, - std::string outString, - double fixedEpsilon = -999, - int ulpsEpsilon = -999); - // ========================================================================= - - // ========================================================================= - /*! - * \brief Holds a single std::string that's intended to be read only and - * global. Use for storing the path of the root directory of Cholla - * - */ - class GlobalString - { - private: - /// The path variable - std::string _string; - public: - /*! - * \brief Initializes the _path member variable. Should only be called - * once in main - * - * \param inputPath The path to be store in _path - */ - void init(std::string const &inputPath) {_string = inputPath;}; - - /*! - * \brief Get the String object - * - * \return std::string The string variable - */ - std::string getString() {return _string;}; - GlobalString() = default; - ~GlobalString() = default; - }; - // ========================================================================= +// ========================================================================= +/*! + * \brief Compute the Units in the Last Place (ULP) difference between two + * doubles + * + * \details This function is modified from + * [Comparing Floating-Point Numbers Is Tricky by Matt + * Kline](https://bitbashing.io/comparing-floats.html) which is in turn based on + * [Comparing Floating Point Numbers, 2012 Edition by Bruce + * Dawson](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/). + * The latter seems to be the bible of floating point comparison and is the + * basis of Googletests ASSERT_DOUBLE_EQ assertion. + * + * This particular function checks that the two numbers if the numbers are + * perfectly equal, +0, -0, Nan, inf, or differently signed then it computes + * the ULP difference between them are returns it + * + * \param[in] a The first double you wish to compare. Order doesn't matter. + * \param[in] b The second double you wish to compare. Order doesn't matter. + * \return int64_t The ULP distance between a and b. + */ +int64_t ulpsDistanceDbl(double const &a, double const &b); +// ========================================================================= + +// ========================================================================= +/*! + * \brief Check if two doubles are nearly equal. + * + * \details This function checks if two doubles are "nearly equal" which is + * defined as either: A) the absolute difference between them is less than + * the fixedEpsilon argument or B) the units in the last place (ULP) + * difference is less than the ulpsEpsilon argument. Both of the epsilon + * arguments have default values which generally should not need to be + * changed. + * + * Why does fixedEpsilon default to 1E-14? Running the Sod shock tube when + * Cholla was compiled with GCC 9.3.0 vs. XL 16.1.1-10 on Summit lead to + * absolute differences in the results up to 1.77636E-15. A priori we chose + * that a difference between two numbers that was less than one order of + * magnitude greater than the difference between compilers would be + * considered "equal". I.e. since the maximum absolute error between the GCC + * and XL compilers was ~1.7E-15 our allowed margin of error should be + * ~1E-14. + * + * Why does ulpsEpsilon default to 4? Repeating the test above I computed + * the largest ULP difference that wasn't caught by the absolute difference + * requirement of 1E-14. It turns out that there were no uncaught + * differences at all so I kept ulpsEpsilon at 4 since that's the Googletest + * default for their floating point assertions + * + * \param[in] a The first double you wish to compare. Order doesn't matter. + * \param[in] b The first double you wish to compare. Order doesn't matter. + * \param[out] absoluteDiff The absolute difference between the numbers. + * Only returned if the numbers are not equal. If the numbers are equal then + * behaviour is undefined + * \param[out] ulpsDiff The ULP difference between the numbers. + * Only returned if the numbers are not equal. If the numbers are equal then + * behaviour is undefined + * \param[in] fixedEpsilon The allowed difference in real numbers. Defaults + * to 1E-14 + * \param[in] ulpsEpsilon The allowed difference of ULPs. Defaults to 4 + * \return bool Whether or not the numbers are equal + */ +bool nearlyEqualDbl(double const &a, double const &b, double &absoluteDiff, int64_t &ulpsDiff, + double const &fixedEpsilon = 1E-14, int64_t const &ulpsEpsilon = 4); +// ========================================================================= + +void wrapperEqual(int i, int j, int k, std::string const &dataSetName, double test_value, double fid_value, + double fixedEpsilon); + +void analyticConstant(system_test::SystemTestRunner testObject, std::string const &dataSetName, double value); + +void analyticSine(system_test::SystemTestRunner testObject, std::string const &dataSetName, double constant, + double amplitude, double kx, double ky, double kz, double phase, double tolerance); + +// ========================================================================= +/*! + * \brief A simple function to compare two doubles with the nearlyEqualDbl + * function, perform a GTest assert on the result, and print out the values + * + * \tparam checkType The type of GTest assertion to use. "0" for and + * "EXPECT" and "1" for an "ASSERT" + * \param[in] fiducialNumber The fiducial number to test against + * \param[in] testNumber The unverified number to test + * \param[in] outString A string to be printed in the first line of the output + * message. Format will be "Difference in outString" + * \param[in] fixedEpsilon The fixed epsilon to use in the comparison. + * Negative values are ignored and default behaviour is used + * \param[in] ulpsEpsilon The ULP epsilon to use in the comparison. Negative + * values are ignored and default behaviour is used + */ +template +void Check_Results(double fiducialNumber, double testNumber, std::string const &outString, double fixedEpsilon = -999, + int64_t ulpsEpsilon = -999) +{ + // Check for equality and if not equal return difference + double absoluteDiff; + int64_t ulpsDiff; + bool areEqual; + + if ((fixedEpsilon < 0) and (ulpsEpsilon < 0)) { + areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff); + } else if ((fixedEpsilon > 0) and (ulpsEpsilon < 0)) { + areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon); + } else { + areEqual = testing_utilities::nearlyEqualDbl(fiducialNumber, testNumber, absoluteDiff, ulpsDiff, fixedEpsilon, + ulpsEpsilon); + } + + std::stringstream outputMessage; + outputMessage << std::setprecision(std::numeric_limits::max_digits10) << "Difference in " << outString + << std::endl + << "The fiducial value is: " << fiducialNumber << std::endl + << "The test value is: " << testNumber << std::endl + << "The absolute difference is: " << absoluteDiff << std::endl + << "The ULP difference is: " << ulpsDiff << std::endl; + + if (checkType == 0) { + EXPECT_TRUE(areEqual) << outputMessage.str(); + } else if (checkType == 1) { + ASSERT_TRUE(areEqual) << outputMessage.str(); + } else { + throw std::runtime_error( + "Incorrect template argument passed to " + "Check_Results. Options are 0 and 1 but " + + std::to_string(checkType) + " was passed"); + } } +// ========================================================================= + +// ========================================================================= +/*! + * \brief Holds a single std::string that's intended to be read only and + * global. Use for storing the path of the root directory of Cholla + * + */ +class GlobalString +{ + private: + /// The path variable + std::string _string; + + public: + /*! + * \brief Initializes the _path member variable. Should only be called + * once in main + * + * \param inputPath The path to be store in _path + */ + void init(std::string const &inputPath) { _string = inputPath; }; + + /*! + * \brief Get the String object + * + * \return std::string The string variable + */ + std::string getString() { return _string; }; + GlobalString() = default; + ~GlobalString() = default; +}; +// ========================================================================= +} // namespace testing_utilities // Declare the global string variables so everything that imports this file // has access to them -extern testingUtilities::GlobalString globalChollaRoot; -extern testingUtilities::GlobalString globalChollaBuild; -extern testingUtilities::GlobalString globalChollaMachine; -extern testingUtilities::GlobalString globalMpiLauncher; +extern testing_utilities::GlobalString globalChollaRoot; +extern testing_utilities::GlobalString globalChollaBuild; +extern testing_utilities::GlobalString globalChollaMachine; +extern testing_utilities::GlobalString globalMpiLauncher; extern bool globalRunCholla; extern bool globalCompareSystemTestResults; diff --git a/src/utils/timing_functions.cpp b/src/utils/timing_functions.cpp index 895c12806..a0382e43f 100644 --- a/src/utils/timing_functions.cpp +++ b/src/utils/timing_functions.cpp @@ -1,47 +1,88 @@ - +#include "../utils/timing_functions.h" #ifdef CPU_TIME -#include "../utils/timing_functions.h" -#include "../io/io.h" -#include -#include -#include + #include + #include + #include + #include -#ifdef MPI_CHOLLA -#include "../mpi/mpi_routines.h" -#endif + #include "../global/global.h" + #include "../global/global_cuda.h" + #include "../io/io.h" + + #ifdef MPI_CHOLLA + #include "../mpi/mpi_routines.h" + #endif -void OneTime::Start(){ - if (inactive) return; - time_start = get_time(); +void OneTime::Start() +{ + cudaDeviceSynchronize(); + if (inactive) { + return; + } + time_start = Get_Time(); } -void OneTime::Subtract(Real time_to_subtract){ - // Add the time_to_substract to the start time, that way the time_end - time_start is reduced by time_to_substract +void OneTime::Subtract(Real time_to_subtract) +{ + // Add the time_to_substract to the start time, that way the time_end - + // time_start is reduced by time_to_substract time_start += time_to_subtract; } -void OneTime::End(){ - if (inactive) return; - Real time_end = get_time(); - Real time = (time_end - time_start)*1000; +void OneTime::End(bool const print_high_values) +{ + cudaDeviceSynchronize(); + if (inactive) { + return; + } + Real time_end = Get_Time(); + Real time = (time_end - time_start) * 1000; -#ifdef MPI_CHOLLA + #ifdef MPI_CHOLLA t_min = ReduceRealMin(time); t_max = ReduceRealMax(time); t_avg = ReduceRealAvg(time); -#else + #else t_min = time; t_max = time; t_avg = time; -#endif - if (n_steps > 0) t_all += t_max; + #endif + if (n_steps > 0) { + t_all += t_max; + } n_steps++; -} + #ifdef MPI_CHOLLA + // Print out information if the process is unusually slow + if ((time >= 1.1 * t_avg) and (n_steps > 0) and print_high_values) { + // Get node ID + std::string node_id(MPI_MAX_PROCESSOR_NAME, ' '); + int length; + MPI_Get_processor_name(node_id.data(), &length); + node_id.resize(length); + + // Get GPU ID + std::string gpu_id(MPI_MAX_PROCESSOR_NAME, ' '); + int device; + GPU_Error_Check(cudaGetDevice(&device)); + GPU_Error_Check(cudaDeviceGetPCIBusId(gpu_id.data(), gpu_id.size(), device)); + gpu_id.erase( + std::find_if(gpu_id.rbegin(), gpu_id.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), + gpu_id.end()); + + std::cerr << "WARNING: Rank took longer than expected to execute." << std::endl + << " Node Time: " << time << std::endl + << " Avg Time: " << t_avg << std::endl + << " Node ID: " << node_id << std::endl + << " GPU PCI Bus ID: " << gpu_id << std::endl; + } + #endif // MPI_CHOLLA +} -void OneTime::RecordTime( Real time ){ - time *= 1000; //Convert from secs to ms +void OneTime::RecordTime(Real time) +{ + time *= 1000; // Convert from secs to ms #ifdef MPI_CHOLLA t_min = ReduceRealMin(time); t_max = ReduceRealMax(time); @@ -51,27 +92,30 @@ void OneTime::RecordTime( Real time ){ t_max = time; t_avg = time; #endif - if (n_steps > 0) t_all += t_max; + if (n_steps > 0) { + t_all += t_max; + } n_steps++; } - -void OneTime::PrintStep(){ +void OneTime::PrintStep() +{ chprintf(" Time %-19s min: %9.4f max: %9.4f avg: %9.4f ms\n", name, t_min, t_max, t_avg); } -void OneTime::PrintAverage(){ - if (n_steps > 1) chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all/(n_steps-1)); -} - -void OneTime::PrintAll(){ - chprintf(" Time %-19s all: %9.4f ms\n", name, t_all); +void OneTime::PrintAverage() +{ + if (n_steps > 1) { + chprintf(" Time %-19s avg: %9.4f ms\n", name, t_all / (n_steps - 1)); + } } -Time::Time( void ){} +void OneTime::PrintAll() { chprintf(" Time %-19s all: %9.4f ms\n", name, t_all); } -void Time::Initialize(){ +Time::Time(void) {} +void Time::Initialize() +{ n_steps = 0; // Add or remove timers by editing this list, keep TOTAL at the end @@ -79,62 +123,72 @@ void Time::Initialize(){ // add Timer.NAME.Start() and Timer.NAME.End() where appropriate. onetimes = { - #ifdef PARTICLES - &(Calc_dt = OneTime("Calc_dt")), - #endif - &(Hydro = OneTime("Hydro")), - &(Boundaries = OneTime("Boundaries")), - #ifdef GRAVITY - &(Grav_Potential = OneTime("Grav_Potential")), - &(Pot_Boundaries = OneTime("Pot_Boundaries")), - #endif - #ifdef PARTICLES - &(Part_Density = OneTime("Part_Density")), - &(Part_Boundaries = OneTime("Part_Boundaries")), - &(Part_Dens_Transf = OneTime("Part_Dens_Transf")), - &(Advance_Part_1 = OneTime("Advance_Part_1")), - &(Advance_Part_2 = OneTime("Advance_Part_2")), - #endif - #ifdef COOLING_GRACKLE - &(Cooling = OneTime("Cooling")), - #endif - #ifdef CHEMISTRY_GPU - &(Chemistry = OneTime("Chemistry")), + #ifdef PARTICLES + &(Calc_dt = OneTime("Calc_dt")), + #endif + &(Hydro_Integrator = OneTime("Hydro_Integrator")), + &(Hydro = OneTime("Hydro")), + &(Boundaries = OneTime("Boundaries")), + #ifdef GRAVITY + &(Grav_Potential = OneTime("Grav_Potential")), + &(Pot_Boundaries = OneTime("Pot_Boundaries")), + #endif + #ifdef PARTICLES + &(Part_Density = OneTime("Part_Density")), + &(Part_Boundaries = OneTime("Part_Boundaries")), + &(Part_Dens_Transf = OneTime("Part_Dens_Transf")), + &(Advance_Part_1 = OneTime("Advance_Part_1")), + &(Advance_Part_2 = OneTime("Advance_Part_2")), + #endif + #ifdef COOLING_GPU + &(Cooling_GPU = OneTime("Cooling_GPU")), + #endif + #ifdef COOLING_GRACKLE + &(Cooling_Grackle = OneTime("Cooling_Grackle")), + #endif + #ifdef CHEMISTRY_GPU + &(Chemistry = OneTime("Chemistry")), + #endif + #ifdef SUPERNOVA + &(Feedback = OneTime("Feedback")), + #ifdef ANALYSIS + &(FeedbackAnalysis = OneTime("FeedbackAnalysis")), #endif - &(Total = OneTime("Total")), + #endif // SUPERNOVA + &(Total = OneTime("Total")), }; - - chprintf( "\nTiming Functions is ON \n"); - + chprintf("\nTiming Functions is ON \n"); } -void Time::Print_Times(){ - for (OneTime* x : onetimes){ +void Time::Print_Times() +{ + for (OneTime* x : onetimes) { x->PrintStep(); } } // once at end of run in main.cpp -void Time::Print_Average_Times( struct parameters P ){ - +void Time::Print_Average_Times(struct Parameters P) +{ chprintf("\nAverage Times n_steps:%d\n", n_steps); - for (OneTime* x : onetimes){ + for (OneTime* x : onetimes) { x->PrintAverage(); } - std::string file_name ( "run_timing.log" ); - std::string header; + std::string file_name("run_timing.log"); - chprintf( "Writing timing values to file: %s \n", file_name.c_str()); + chprintf("Writing timing values to file: %s \n", file_name.c_str()); - std::string gitHash = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); - std::string macroFlags = "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n\n"); + std::string header = "Git Commit Hash = " + std::string(GIT_HASH) + std::string("\n"); + header += "Macro Flags = " + std::string(MACRO_FLAGS) + std::string("\n"); + header += "Note that the timers all skip the first time step since it always takes longer." + std::string("\n") + + "To find the average time divide the time shown by n_steps-1" + std::string("\n"); - header = "#n_proc nx ny nz n_omp n_steps "; + header += std::string("\n") + "#n_proc nx ny nz n_omp n_steps "; - for (OneTime* x : onetimes){ + for (OneTime* x : onetimes) { header += x->name; header += " "; } @@ -142,26 +196,25 @@ void Time::Print_Average_Times( struct parameters P ){ header += " \n"; bool file_exists = false; - if (FILE *file = fopen(file_name.c_str(), "r")){ + if (FILE* file = fopen(file_name.c_str(), "r")) { file_exists = true; - chprintf( " File exists, appending values: %s \n", file_name.c_str() ); - fclose( file ); - } else{ - chprintf( " Creating File: %s \n", file_name.c_str() ); + chprintf(" File exists, appending values: %s \n", file_name.c_str()); + fclose(file); + } else { + chprintf(" Creating File: %s \n", file_name.c_str()); } #ifdef MPI_CHOLLA - if ( procID != 0 ) return; + if (procID != 0) { + return; + } #endif std::ofstream out_file; -// Output timing values + // Output timing values out_file.open(file_name.c_str(), std::ios::app); - if ( !file_exists ) - { - out_file << gitHash; - out_file << macroFlags; + if (!file_exists) { out_file << header; } #ifdef MPI_CHOLLA @@ -177,15 +230,40 @@ void Time::Print_Average_Times( struct parameters P ){ #endif out_file << n_steps << " "; - for (OneTime* x : onetimes){ + for (OneTime* x : onetimes) { out_file << x->t_all << " "; } out_file << "\n"; out_file.close(); - chprintf( "Saved Timing: %s \n\n", file_name.c_str() ); - + chprintf("Saved Timing: %s \n\n", file_name.c_str()); } +#endif // CPU_TIME + +ScopedTimer::ScopedTimer(const char* input_name) +{ +#ifdef CPU_TIME + name = input_name; + time_start = Get_Time(); #endif +} + +ScopedTimer::~ScopedTimer(void) +{ +#ifdef CPU_TIME + double time_elapsed_ms = (Get_Time() - time_start) * 1000; + + #ifdef MPI_CHOLLA + double t_min = ReduceRealMin(time_elapsed_ms); + double t_max = ReduceRealMax(time_elapsed_ms); + double t_avg = ReduceRealAvg(time_elapsed_ms); + #else + double t_min = time_elapsed_ms; + double t_max = time_elapsed_ms; + double t_avg = time_elapsed_ms; + #endif // MPI_CHOLLA + chprintf("ScopedTimer Min: %9.4f ms Max: %9.4f ms Avg: %9.4f ms %s \n", t_min, t_max, t_avg, name); +#endif // CPU_TIME +} diff --git a/src/utils/timing_functions.h b/src/utils/timing_functions.h index d11db642a..96cceea15 100644 --- a/src/utils/timing_functions.h +++ b/src/utils/timing_functions.h @@ -1,48 +1,51 @@ -#ifdef CPU_TIME #ifndef TIMING_FUNCTIONS_H #define TIMING_FUNCTIONS_H #include -#include "../global/global.h" -// Each instance of this class represents a single timer, timing a single section of code. -// All instances have their own n_steps, time_start, etc. so that all timers can run independently +#include "../global/global.h" // Provides Real, Get_Time + +// #ifdef CPU_TIME +// Each instance of this class represents a single timer, timing a single +// section of code. All instances have their own n_steps, time_start, etc. so +// that all timers can run independently class OneTime { public: const char* name; - int n_steps = 0; - Real time_start; - Real t_min; - Real t_max; - Real t_avg; - Real t_all=0; - bool inactive=true; - OneTime(void){ - } - OneTime(const char* input_name){ - name = input_name; - inactive=false; + int n_steps = 0; + Real time_start = 0; + Real t_min = 0; + Real t_max = 0; + Real t_avg = 0; + Real t_all = 0; + bool inactive = true; + OneTime(void) {} + OneTime(const char* input_name) + { + name = input_name; + inactive = false; } void Start(); void Subtract(Real time_to_subtract); - void End(); + void End(bool const print_high_values = false); void PrintStep(); void PrintAverage(); void PrintAll(); - void RecordTime( Real time ); + void RecordTime(Real time); }; -// Time loops through instances of OneTime. onetimes is initialized with pointers to each timer. +// Time loops through instances of OneTime. onetimes is initialized with +// pointers to each timer. // class Time { -public: - + public: int n_steps; OneTime Total; OneTime Calc_dt; + OneTime Hydro_Integrator; OneTime Hydro; OneTime Boundaries; OneTime Grav_Potential; @@ -52,18 +55,34 @@ class Time OneTime Part_Dens_Transf; OneTime Advance_Part_1; OneTime Advance_Part_2; - OneTime Cooling; + OneTime Cooling_GPU; + OneTime Cooling_Grackle; OneTime Chemistry; - + OneTime Feedback; + OneTime FeedbackAnalysis; + std::vector onetimes; - + Time(); void Initialize(); void Print_Times(); - void Print_Average_Times( struct parameters P ); - + void Print_Average_Times(struct Parameters P); }; +// #endif // CPU_TIME +// ScopedTimer does nothing if CPU_TIME is disabled +/* \brief ScopedTimer helps time a scope. Initialize as first variable and C++ guarantees it is destroyed last */ +class ScopedTimer +{ + public: + const char* name; + double time_start = 0; + + /* \brief ScopedTimer Constructor initializes name and time */ + ScopedTimer(const char* input_name); + + /* \brief ScopedTimer Destructor computes dt and prints */ + ~ScopedTimer(void); +}; -#endif -#endif //CPU_TIME +#endif // TIMING_FUNCTIONS_H diff --git a/tools/analyze_tidy_checks.py b/tools/analyze_tidy_checks.py new file mode 100755 index 000000000..de5c86313 --- /dev/null +++ b/tools/analyze_tidy_checks.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +================================================================================ + This script analyzes the clang-tidy output and produces an ordered list of all + the checks run, how many failures a check generated and the percentage of + failures a check represents. + + When running, make sure that you have already run clang-tidy with all the + checks you want enabled since this script looks for the 2 tidy_results_*.log + files in the root directory of Cholla +================================================================================ +""" + +import numpy as np +import pandas as pd +import pathlib +import subprocess + + +def main(): + # Determine path to Cholla directory + chollaPath = pathlib.Path(__file__).resolve().parent.parent + + # Load required data + tidyResults = loadTidyResults(chollaPath) + enabledChecks = getEnabledChecks(chollaPath) + + # Count and sort the errors + sortedChecks, totalWarnings, numPassing, numFailing = countAndSort( + tidyResults, enabledChecks + ) + + # Print Results in markdown format + printResults(sortedChecks, totalWarnings, numPassing, numFailing) + + +def loadTidyResults(chollaPath): + with open(chollaPath / "tidy_results_cpp.log", "r") as file: + cppData = file.read() + with open(chollaPath / "tidy_results_gpu.log", "r") as file: + gpuData = file.read() + + return cppData + gpuData + + +def getEnabledChecks(chollaPath): + stdout = subprocess.run( + ["clang-tidy", "--list-checks"], cwd=chollaPath, stdout=subprocess.PIPE + ).stdout.decode("utf-8") + + # find index where checks start + stdout = stdout.split() + for i in range(len(stdout)): + if "bugprone" in stdout[i]: + index = i + break + + return stdout[index:] + + +def countAndSort(tidyResults, enabledChecks): + passingChecks = 0 + failingChecks = 0 + numWarnings = np.zeros(len(enabledChecks)) + + for i, check in enumerate(enabledChecks): + numWarnings[i] = tidyResults.count(check) + if check in tidyResults: + failingChecks += 1 + else: + passingChecks += 1 + + # Convert to dataframe and sort + sortedChecks = sorted(list(zip(numWarnings, enabledChecks))) + sortedChecks.reverse() + totalWarnings = numWarnings.sum() + + return sortedChecks, totalWarnings, passingChecks, failingChecks + + +def printResults(sortedChecks, totalWarnings, numPassing, numFailing): + # Determine percentages + totalChecks = numPassing + numFailing + + print(f"Total number of warnings: {int(totalWarnings)}") + print(f"{round(numPassing/totalChecks*100, 2)}% of checks passing") + print(f"{round(numFailing/totalChecks*100, 2)}% of checks failing") + + col1Title = "Number of Warnings" + col2Title = "Percentage of Warnings" + col3Title = "Check" + col3Length = np.max([len(entry[1]) for entry in sortedChecks]) + + print() + print("Failing Checks:") + print(f"| {col1Title} | {col2Title} | {col3Title:{col3Length}} |") + print(f'| {"-"*len(col1Title)} | {"-"*len(col2Title)} | {"-"*col3Length} |') + for entry in sortedChecks: + if int(entry[0]) != 0: + print( + f"| {int(entry[0]):18} | {(entry[0] / totalWarnings)*100:22.2f} | {entry[1]:{col3Length}} |" + ) + + +if __name__ == "__main__": + main() diff --git a/tools/cholla-nv-compute-sanitizer.sh b/tools/cholla-nv-compute-sanitizer.sh new file mode 100755 index 000000000..ece87b3e0 --- /dev/null +++ b/tools/cholla-nv-compute-sanitizer.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash + +# Utility script for running the NVIDIA Compute Sanitizer. +# The Compute Sanitizer provides 4 tool: +# - Memcheck: The memory access error and leak detection tool. +# - Racecheck: The shared memory data access hazard detection tool. +# - Initcheck: The uninitialized device global memory access detection tool. +# - Synccheck: The thread synchronization hazard detection tool. +# +# See the NVIDIA docs for more detail: +# https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html +# +# Syntax: compute-sanitizer [options] app_name [app_options] +# +# Compilation: Benefits from -G and -lineinfo. -Xcompiler -rdynamic for backtraces + +# Memcheck args +# --leak-check full/no (default: no) full = info about memory leaks +# --padding NUM, puts padding around arrays to improve out-of-bounds checking. +# NUM is The size of the pad in bytes, we should probably pad at least a couple +# of doubles, say 8 so pad=8*8=64 +# +# initcheck args +# --track-unused-memory yes/no (default: no) Check for unused memory allocations. +# +# Racecheck args +# - --print-level info + + +#set -x #echo all commands +while getopts "t:h" opt; do + case $opt in + t) # Set the tool to use + case ${OPTARG} in + m) + tool="memcheck" + tool_args="--leak-check full --padding 64 --report-api-errors all" + ;; + r) + tool="racecheck" + tool_args="--print-level info" + ;; + i) + tool="initcheck" + tool_args="--track-unused-memory yes" + ;; + s) + tool="synccheck" + tool_args="" + ;; + esac + ;; + h) # Print help + echo -e " +While not required the following compile flags can help: -G for debug builds, +-lineinfo for performance builds (can't be used with -G) and -Xcompiler -rdynamic +is useful for backtraces in all builds. + +Options: +-t m/r/i/s: Selects the tool to use. + m: runs the memcheck tool + r: runs the racecheck tool + i: runs the initcheck tool + s: runs the synccheck tool +-h: This dialogue" + exit 0 + ;; + \?) + echo "Invalid option: -${OPTARG}" >&2 + exit 1 + ;; + :) + echo "Option -${OPTARG} requires an argument." >&2 + exit 1 + ;; + esac +done + +# Exit if no tool was selected +if [ -z "$tool" ]; then + echo 'Missing tool argument' >&2 + exit 1 +fi + +# Get Paths +cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" +cholla_exe=$(find "${cholla_root}" -name cholla.*) +cholla_parameter_file="${cholla_root}/examples/3D/sod.txt" +COMPUTE_SANITIZER=$(which compute-sanitizer) +sanitizer_log_file="${cholla_root}/bin/compute-sanitizer-${tool}.log" + +# Echo Paths +echo -e "cholla_root = ${cholla_root}" +echo -e "cholla_exe = ${cholla_exe}" +echo -e "cholla_parameter_file = ${cholla_parameter_file}" +echo -e "COMPUTE_SANITIZER = ${COMPUTE_SANITIZER}" +echo -e "sanitizer_log_file = ${sanitizer_log_file}" +echo -e "" +echo -e "tool = ${tool}" +echo -e "tool_args = ${tool_args}" + +# Execute Sanitizer +COMMAND="${COMPUTE_SANITIZER} --log-file ${sanitizer_log_file} --tool ${tool} ${tool_args} ${cholla_exe} ${cholla_parameter_file}" +echo -e "Launch Command = ${COMMAND}" +$COMMAND \ No newline at end of file diff --git a/tools/clang-format_runner.sh b/tools/clang-format_runner.sh new file mode 100755 index 000000000..ece80ec67 --- /dev/null +++ b/tools/clang-format_runner.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# Description: +# Run clang-format on all the source files in Cholla. Any command line arguments +# provided to this script are passed directly to clang-format +# +# Dependencies: +# - clang-format v15 or greater +# - GNU Find, the default macos version won't work + +# Get the location of Cholla +cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" +cd $cholla_root + +# Get a list of all the files to format +readarray -t files <<<$(find ${cholla_root} -regex '.*\.\(h\|hpp\|c\|cpp\|cu\|cuh\)$' -print) + +clang-format -i --verbose "$@" -style="file" "${files[@]}" \ No newline at end of file diff --git a/tools/clang-tidy_runner.sh b/tools/clang-tidy_runner.sh new file mode 100755 index 000000000..6f2915b8f --- /dev/null +++ b/tools/clang-tidy_runner.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# Description: +# Run clang-tidy on all build types in parallel. Note that this spawns 2x the +# number of build types threads since each type has a thread for the CPU code +# and a thread for the GPU code + +# If ctrl-c is sent trap it and kill all clang-tidy processes +trap "kill -- -$$" EXIT + +# cd into the Cholla directory. Default to ${HOME}/Code/cholla +cholla_root="$(dirname "$(dirname "$(readlink -fm "$0")")")" +cd $cholla_root + +# Run all clang-tidy build types in parallel +builds=( hydro gravity disk particles cosmology mhd dust cooling) +for build in "${builds[@]}" +do + make tidy TYPE=$build & +done + +# Wait for clang-tidy to finish +wait