diff --git a/docs/guide/_sidebar.md b/docs/guide/_sidebar.md index ad6b30a5d..87fc77aa1 100644 --- a/docs/guide/_sidebar.md +++ b/docs/guide/_sidebar.md @@ -8,7 +8,6 @@ - [Introduction](/guide/occa/introduction) - [Device Streams](/guide/occa/device-streams) - [Background Device](/guide/occa/background-device) - - [Unified Memory](/guide/occa/unified-memory) - **OKL** - [Introduction](/guide/okl/introduction) diff --git a/docs/guide/occa/unified-memory.md b/docs/guide/occa/unified-memory.md deleted file mode 100644 index 8f12b4adf..000000000 --- a/docs/guide/occa/unified-memory.md +++ /dev/null @@ -1,51 +0,0 @@ -# Unified Memory - -Unified memory is another feature that can facilitate adding OCCA in existing codes. -Rather than working with `occa::memory` objects, we allow for the use of raw pointers instead. - -::: tabs language - -- C++ - - ```cpp - int *a = (int*) occa::umalloc(10 * sizeof(int)); - ``` - -- C - - ```c - int *a = (int*) occaUmalloc(10 * sizeof(int)); - ``` - -::: - -The resulting pointer indirectly maps to memory in the device. -We can edit the host pointer and use it directly in `occa::kernels` - -::: tabs language - -- C++ - - ```cpp - for (int i = 0; i < 10; ++i) { - a[i] = i; - } - addVectors(10, a, o_b, o_ab); - ``` - -- C - - ```c - for (int i = 0; i < 10; ++i) { - a[i] = i; - } - occaKernelRun(addVectors, - occaInt(entries), - a, o_b, o_ab); - ``` - -::: - -# Syncing Data - -!> TODO: Missing Section diff --git a/examples/c/01_add_vectors/main.c b/examples/c/01_add_vectors/main.c index 6186b1c5a..0cf95a919 100644 --- a/examples/c/01_add_vectors/main.c +++ b/examples/c/01_add_vectors/main.c @@ -69,12 +69,13 @@ int main(int argc, const char **argv) { //======================================================== // Allocate memory on the device - o_a = occaDeviceTypedMalloc(device, entries, occaDtypeFloat, NULL, occaDefault); - o_b = occaDeviceTypedMalloc(device, entries, occaDtypeFloat, NULL, occaDefault); + o_a = occaDeviceTypedMalloc(device, entries, occaDtypeFloat, a, occaDefault); + o_b = occaDeviceTypedMalloc(device, entries, occaDtypeFloat, b, occaDefault); - // We can also allocate memory without a dtype + // We can also allocate memory without a dtype and manually copy the data over // WARNING: This will disable runtime type checking o_ab = occaDeviceMalloc(device, entries * sizeof(float), NULL, occaDefault); + occaCopyPtrToMem(o_ab, ab, occaAllBytes, 0, occaDefault); // Setup properties that can be passed to the kernel occaJson props = occaCreateJson(); @@ -86,10 +87,6 @@ int main(int argc, const char **argv) { "addVectors", props); - // Copy memory to the device - occaCopyPtrToMem(o_a, a, entries*sizeof(float), 0, occaDefault); - occaCopyPtrToMem(o_b, b, occaAllBytes , 0, occaDefault); - // Launch device kernel occaKernelRun(addVectors, occaInt(entries), o_a, o_b, o_ab); @@ -132,7 +129,7 @@ occaJson parseArgs(int argc, const char **argv) { " shortname: 'd'," " description: 'Device properties (default: \"{mode: \\'Serial\\'}\")'," " with_arg: true," - " default_value: { mode: 'Serial' }," + " default_value: \"{ mode: 'Serial' }\"," " }," " {" " name: 'verbose'," diff --git a/examples/c/02_background_device/main.c b/examples/c/02_background_device/main.c index 28ff4f951..f29c96497 100644 --- a/examples/c/02_background_device/main.c +++ b/examples/c/02_background_device/main.c @@ -37,9 +37,9 @@ int main(int argc, const char **argv) { int entries = 5; int i; - float *a = (float*) occaTypedUMalloc(entries, occaDtypeFloat, NULL, occaDefault); - float *b = (float*) occaTypedUMalloc(entries, occaDtypeFloat, NULL, occaDefault); - float *ab = (float*) occaTypedUMalloc(entries, occaDtypeFloat, NULL, occaDefault); + float *a = (float*) malloc(entries*sizeof(float)); + float *b = (float*) malloc(entries*sizeof(float)); + float *ab = (float*) malloc(entries*sizeof(float)); for (i = 0; i < entries; ++i) { a[i] = i; @@ -47,23 +47,20 @@ int main(int argc, const char **argv) { ab[i] = 0; } + // Allocate memory on the background device + occaMemory o_a = occaTypedMalloc(entries, occaDtypeFloat, a, occaDefault); + occaMemory o_b = occaTypedMalloc(entries, occaDtypeFloat, b, occaDefault); + occaMemory o_ab = occaTypedMalloc(entries, occaDtypeFloat, ab, occaDefault); + occaKernel addVectors = occaBuildKernel("addVectors.okl", "addVectors", occaDefault); - // Arrays a, b, and ab are now resident - // on [device] occaKernelRun(addVectors, - occaInt(entries), occaPtr(a), occaPtr(b), occaPtr(ab)); - - // b is not const in the kernel, so we can use - // dontSync(b) to manually force b to not sync - occaDontSync(b); + occaInt(entries), o_a, o_b, o_ab); - // Finish work queued up in [device], - // synchronizing a, b, and ab and - // making it safe to use them again - occaFinish(); + // Copy result to the host + occaCopyMemToPtr(ab, o_ab, occaAllBytes, 0, occaDefault); for (i = 0; i < entries; ++i) { printf("%d = %f\n", i, ab[i]); @@ -74,11 +71,17 @@ int main(int argc, const char **argv) { } } + // Free host memory + free(a); + free(b); + free(ab); + + // Free device memory and occa objects occaFree(&args); occaFree(&addVectors); - occaFreeUvaPtr(a); - occaFreeUvaPtr(b); - occaFreeUvaPtr(ab); + occaFree(&o_a); + occaFree(&o_b); + occaFree(&o_ab); return 0; } @@ -94,7 +97,7 @@ occaJson parseArgs(int argc, const char **argv) { " shortname: 'd'," " description: 'Device properties (default: \"{ mode: \\'Serial\\' }\")'," " with_arg: true," - " default_value: { mode: 'Serial' }," + " default_value: \"{ mode: 'Serial' }\"," " }," " {" " name: 'verbose'," diff --git a/examples/c/03_generic_inline_kernel/main.c b/examples/c/03_generic_inline_kernel/main.c index 9d14bd853..188d9ea81 100644 --- a/examples/c/03_generic_inline_kernel/main.c +++ b/examples/c/03_generic_inline_kernel/main.c @@ -27,9 +27,9 @@ int main(int argc, const char **argv) { int entries = 5; int i; - float *a = (float*) occaTypedUMalloc(entries, occaDtypeFloat, NULL, occaDefault); - float *b = (float*) occaTypedUMalloc(entries, occaDtypeFloat, NULL, occaDefault); - float *ab = (float*) occaTypedUMalloc(entries, occaDtypeFloat, NULL, occaDefault); + float *a = (float*) malloc(entries*sizeof(float)); + float *b = (float*) malloc(entries*sizeof(float)); + float *ab = (float*) malloc(entries*sizeof(float)); for (i = 0; i < entries; ++i) { a[i] = i; @@ -37,6 +37,11 @@ int main(int argc, const char **argv) { ab[i] = 0; } + // Allocate memory on the background device + occaMemory o_a = occaTypedMalloc(entries, occaDtypeFloat, a, occaDefault); + occaMemory o_b = occaTypedMalloc(entries, occaDtypeFloat, b, occaDefault); + occaMemory o_ab = occaTypedMalloc(entries, occaDtypeFloat, ab, occaDefault); + occaJson props = occaCreateJson(); occaJsonObjectSet(props, "defines/TILE_SIZE", @@ -46,10 +51,10 @@ int main(int argc, const char **argv) { // Build the variable scope used inside the inlined OKL code occaScopeAddConst(scope, "entries", occaInt(entries)); - occaScopeAddConst(scope, "a", occaPtr(a)); - occaScopeAddConst(scope, "b", occaPtr(b)); + occaScopeAddConst(scope, "a", o_a); + occaScopeAddConst(scope, "b", o_b); // We can name our scoped variales anything - occaScopeAdd(scope, "output", occaPtr(ab)); + occaScopeAdd(scope, "output", o_ab); // We can also add unused variables to the scope which could be // useful while debugging occaScopeAdd(scope, "debugValue", occaInt(42)); @@ -66,7 +71,8 @@ int main(int argc, const char **argv) { ) ); - occaFinish(); + // Copy result to the host + occaCopyMemToPtr(ab, o_ab, occaAllBytes, 0, occaDefault); for (i = 0; i < entries; ++i) { printf("%d = %f\n", i, ab[i]); @@ -77,12 +83,18 @@ int main(int argc, const char **argv) { } } + // Free host memory + free(a); + free(b); + free(ab); + + // Free device memory and occa objects occaFree(&args); occaFree(&props); occaFree(&scope); - occaFreeUvaPtr(a); - occaFreeUvaPtr(b); - occaFreeUvaPtr(ab); + occaFree(&o_a); + occaFree(&o_b); + occaFree(&o_ab); return 0; } @@ -98,7 +110,7 @@ occaJson parseArgs(int argc, const char **argv) { " shortname: 'd'," " description: 'Device properties (default: \"{ mode: \\'Serial\\' }\")'," " with_arg: true," - " default_value: { mode: 'Serial' }," + " default_value: \"{ mode: 'Serial' }\"," " }," " {" " name: 'verbose'," diff --git a/examples/c/04_reduction/main.c b/examples/c/04_reduction/main.c index 4382946ad..11fdcc3c1 100644 --- a/examples/c/04_reduction/main.c +++ b/examples/c/04_reduction/main.c @@ -114,7 +114,7 @@ occaJson parseArgs(int argc, const char **argv) { " shortname: 'd'," " description: 'Device properties (default: \"{ mode: \\'Serial\\' }\")'," " with_arg: true," - " default_value: { mode: 'Serial' }," + " default_value: \"{ mode: 'Serial' }\"," " }," " {" " name: 'verbose'," diff --git a/examples/cpp/04_generic_inline_kernel/main.cpp b/examples/cpp/04_generic_inline_kernel/main.cpp index f38309818..fab61802e 100644 --- a/examples/cpp/04_generic_inline_kernel/main.cpp +++ b/examples/cpp/04_generic_inline_kernel/main.cpp @@ -19,9 +19,9 @@ int main(int argc, const char **argv) { int entries = 5; - float *a = occa::umalloc(entries); - float *b = occa::umalloc(entries); - float *ab = occa::umalloc(entries); + float *a = new float[entries]; + float *b = new float[entries]; + float *ab = new float[entries]; for (int i = 0; i < entries; ++i) { a[i] = i; @@ -29,12 +29,21 @@ int main(int argc, const char **argv) { ab[i] = 0; } + // Uses the background device + occa::array array_a(entries); + occa::array array_b(entries); + occa::array array_ab(entries); + + // Copy over host data + array_a.copyFrom(a); + array_b.copyFrom(b); + array_ab.fill(0); + occa::scope scope({ {"entries", entries}, - // Const-ness of variables is passed through, which can be useful for the compiler - {"a", (const float*) a}, - {"b", (const float*) b}, - {"ab", ab} + {"a", array_a}, + {"b", array_b}, + {"ab", array_ab} }, { // Define TILE_SIZE at compile-time {"defines/TILE_SIZE", 16} @@ -46,7 +55,7 @@ int main(int argc, const char **argv) { } )); - occa::finish(); + array_ab.copyTo(ab); for (int i = 0; i < entries; ++i) { std::cout << i << ": " << ab[i] << '\n'; @@ -57,9 +66,10 @@ int main(int argc, const char **argv) { } } - occa::free(a); - occa::free(b); - occa::free(ab); + // Free host memory + delete [] a; + delete [] b; + delete [] ab; return 0; } diff --git a/examples/cpp/07_memory_management/.gitignore b/examples/cpp/07_memory_management/.gitignore deleted file mode 100644 index 353881e35..000000000 --- a/examples/cpp/07_memory_management/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -main -main.o -main.dSYM - -main_c -main_c.dSYM - -fparser.log -main_f90 \ No newline at end of file diff --git a/examples/cpp/07_memory_management/CMakeLists.txt b/examples/cpp/07_memory_management/CMakeLists.txt deleted file mode 100644 index 4f81d98eb..000000000 --- a/examples/cpp/07_memory_management/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -compile_cpp_example_with_modes(memory_management main.cpp) - -add_custom_target(cpp_example_memory_management_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/addVectors.okl addVectors.okl) -add_dependencies(examples_cpp_memory_management cpp_example_memory_management_okl) diff --git a/examples/cpp/07_memory_management/README.md b/examples/cpp/07_memory_management/README.md deleted file mode 100644 index b4156c8a4..000000000 --- a/examples/cpp/07_memory_management/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# Example: Unified Memory - -We show unified memory which automatically syncs data between host and device - -Transfers are dome between kernel launches and device synchronization (`occa::device::finish()`) when needed - -# Compiling the Example - -```bash -make -``` - -## Usage - -``` -> ./main --help - -Usage: ./main [OPTIONS] - -Example using unified memory, where host and device data is mirrored and synced - -Options: - -d, --device Device properties (default: "{mode: 'Serial'}") - -h, --help Print usage - -v, --verbose Compile kernels in verbose mode -``` diff --git a/examples/cpp/07_memory_management/main.cpp b/examples/cpp/07_memory_management/main.cpp deleted file mode 100644 index 1011ba2c8..000000000 --- a/examples/cpp/07_memory_management/main.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include - -#include - -//---[ Internal Tools ]----------------- -// Note: These headers are not officially supported -// Please don't rely on it outside of the occa examples -#include -#include -//====================================== - -occa::json parseArgs(int argc, const char **argv); - -int main(int argc, const char **argv) { - occa::json args = parseArgs(argc, argv); - - occa::setDevice((std::string) args["options/device"]); - - int entries = 5; - - // umalloc: [U]nified [M]emory [Alloc]ation - // Allocate host memory that auto-syncs with the device - // between before kernel calls and device::finish() - // if needed. - float *a = occa::umalloc(entries); - float *b = occa::umalloc(entries); - float *ab = occa::umalloc(entries); - - for (int i = 0; i < entries; ++i) { - a[i] = i; - b[i] = 1 - i; - ab[i] = 0; - } - - occa::kernel addVectors = occa::buildKernel("addVectors.okl", - "addVectors"); - - // Arrays a, b, and ab are now resident - // on [device] - addVectors(entries, a, b, ab); - - // b is not const in the kernel, so we can use - // dontSync(b) to manually force b to not sync - occa::dontSync(b); - - // Finish work queued up in [device], - // synchronizing a, b, and ab and - // making it safe to use them again - occa::finish(); - - for (int i = 0; i < entries; ++i) { - std::cout << i << ": " << ab[i] << '\n'; - } - for (int i = 0; i < entries; ++i) { - if (!occa::areBitwiseEqual(ab[i], a[i] + b[i])) { - throw 1; - } - } - - // Memory is automatically freed - - return 0; -} -occa::json parseArgs(int argc, const char **argv) { - occa::cli::parser parser; - parser - .withDescription( - "Example using unified memory, where host and device data is mirrored and synced" - ) - .addOption( - occa::cli::option('d', "device", - "Device properties (default: \"{mode: 'Serial'}\")") - .withArg() - .withDefaultValue("{mode: 'CUDA', device_id: 0}") - ) - .addOption( - occa::cli::option('v', "verbose", - "Compile kernels in verbose mode") - ); - - occa::json args = parser.parseArgs(argc, argv); - occa::settings()["kernel/verbose"] = args["options/verbose"]; - - return args; -} diff --git a/examples/cpp/08_streams/.gitignore b/examples/cpp/07_streams/.gitignore similarity index 100% rename from examples/cpp/08_streams/.gitignore rename to examples/cpp/07_streams/.gitignore diff --git a/examples/cpp/08_streams/CMakeLists.txt b/examples/cpp/07_streams/CMakeLists.txt similarity index 100% rename from examples/cpp/08_streams/CMakeLists.txt rename to examples/cpp/07_streams/CMakeLists.txt diff --git a/examples/cpp/07_memory_management/Makefile b/examples/cpp/07_streams/Makefile similarity index 100% rename from examples/cpp/07_memory_management/Makefile rename to examples/cpp/07_streams/Makefile diff --git a/examples/cpp/08_streams/README.md b/examples/cpp/07_streams/README.md similarity index 100% rename from examples/cpp/08_streams/README.md rename to examples/cpp/07_streams/README.md diff --git a/examples/cpp/07_memory_management/addVectors.okl b/examples/cpp/07_streams/addVectors.okl similarity index 100% rename from examples/cpp/07_memory_management/addVectors.okl rename to examples/cpp/07_streams/addVectors.okl diff --git a/examples/cpp/08_streams/main.cpp b/examples/cpp/07_streams/main.cpp similarity index 100% rename from examples/cpp/08_streams/main.cpp rename to examples/cpp/07_streams/main.cpp diff --git a/examples/cpp/09_mpi/.gitignore b/examples/cpp/08_mpi/.gitignore similarity index 100% rename from examples/cpp/09_mpi/.gitignore rename to examples/cpp/08_mpi/.gitignore diff --git a/examples/cpp/09_mpi/CMakeLists.txt b/examples/cpp/08_mpi/CMakeLists.txt similarity index 100% rename from examples/cpp/09_mpi/CMakeLists.txt rename to examples/cpp/08_mpi/CMakeLists.txt diff --git a/examples/cpp/08_streams/Makefile b/examples/cpp/08_mpi/Makefile similarity index 100% rename from examples/cpp/08_streams/Makefile rename to examples/cpp/08_mpi/Makefile diff --git a/examples/cpp/09_mpi/README.md b/examples/cpp/08_mpi/README.md similarity index 100% rename from examples/cpp/09_mpi/README.md rename to examples/cpp/08_mpi/README.md diff --git a/examples/cpp/08_streams/addVectors.okl b/examples/cpp/08_mpi/addVectors.okl similarity index 100% rename from examples/cpp/08_streams/addVectors.okl rename to examples/cpp/08_mpi/addVectors.okl diff --git a/examples/cpp/09_mpi/main.cpp b/examples/cpp/08_mpi/main.cpp similarity index 100% rename from examples/cpp/09_mpi/main.cpp rename to examples/cpp/08_mpi/main.cpp diff --git a/examples/cpp/09_mpi/Makefile b/examples/cpp/09_mpi/Makefile deleted file mode 100644 index bf13a8b42..000000000 --- a/examples/cpp/09_mpi/Makefile +++ /dev/null @@ -1,30 +0,0 @@ - -PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef OCCA_DIR - include $(PROJ_DIR)/../../../scripts/build/Makefile -else - include ${OCCA_DIR}/scripts/build/Makefile -endif - -#---[ COMPILATION ]------------------------------- -headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp) -sources = $(wildcard $(srcPath)/*.cpp) - -objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o)) - -executables: ${PROJ_DIR}/main - -${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp - $(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags) - @if which install_name_tool > /dev/null 2>&1; then \ - install_name_tool -add_rpath "${OCCA_DIR}/lib" ${PROJ_DIR}/main; \ - fi - -$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp))) - $(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $< - -clean: - rm -f $(objPath)/*; - rm -f ${PROJ_DIR}/main; -#================================================= diff --git a/examples/cpp/10_native_cpp_kernels/CMakeLists.txt b/examples/cpp/09_native_cpp_kernels/CMakeLists.txt similarity index 100% rename from examples/cpp/10_native_cpp_kernels/CMakeLists.txt rename to examples/cpp/09_native_cpp_kernels/CMakeLists.txt diff --git a/examples/cpp/10_native_cpp_kernels/Makefile b/examples/cpp/09_native_cpp_kernels/Makefile similarity index 100% rename from examples/cpp/10_native_cpp_kernels/Makefile rename to examples/cpp/09_native_cpp_kernels/Makefile diff --git a/examples/cpp/10_native_cpp_kernels/addVectors.cpp b/examples/cpp/09_native_cpp_kernels/addVectors.cpp similarity index 100% rename from examples/cpp/10_native_cpp_kernels/addVectors.cpp rename to examples/cpp/09_native_cpp_kernels/addVectors.cpp diff --git a/examples/cpp/10_native_cpp_kernels/main.cpp b/examples/cpp/09_native_cpp_kernels/main.cpp similarity index 100% rename from examples/cpp/10_native_cpp_kernels/main.cpp rename to examples/cpp/09_native_cpp_kernels/main.cpp diff --git a/examples/cpp/11_native_c_kernels/CMakeLists.txt b/examples/cpp/10_native_c_kernels/CMakeLists.txt similarity index 100% rename from examples/cpp/11_native_c_kernels/CMakeLists.txt rename to examples/cpp/10_native_c_kernels/CMakeLists.txt diff --git a/examples/cpp/11_native_c_kernels/Makefile b/examples/cpp/10_native_c_kernels/Makefile similarity index 100% rename from examples/cpp/11_native_c_kernels/Makefile rename to examples/cpp/10_native_c_kernels/Makefile diff --git a/examples/cpp/11_native_c_kernels/addVectors.c b/examples/cpp/10_native_c_kernels/addVectors.c similarity index 100% rename from examples/cpp/11_native_c_kernels/addVectors.c rename to examples/cpp/10_native_c_kernels/addVectors.c diff --git a/examples/cpp/11_native_c_kernels/main.cpp b/examples/cpp/10_native_c_kernels/main.cpp similarity index 100% rename from examples/cpp/11_native_c_kernels/main.cpp rename to examples/cpp/10_native_c_kernels/main.cpp diff --git a/examples/cpp/12_native_cuda_kernels/CMakeLists.txt b/examples/cpp/11_native_cuda_kernels/CMakeLists.txt similarity index 100% rename from examples/cpp/12_native_cuda_kernels/CMakeLists.txt rename to examples/cpp/11_native_cuda_kernels/CMakeLists.txt diff --git a/examples/cpp/12_native_cuda_kernels/Makefile b/examples/cpp/11_native_cuda_kernels/Makefile similarity index 100% rename from examples/cpp/12_native_cuda_kernels/Makefile rename to examples/cpp/11_native_cuda_kernels/Makefile diff --git a/examples/cpp/12_native_cuda_kernels/addVectors.cu b/examples/cpp/11_native_cuda_kernels/addVectors.cu similarity index 100% rename from examples/cpp/12_native_cuda_kernels/addVectors.cu rename to examples/cpp/11_native_cuda_kernels/addVectors.cu diff --git a/examples/cpp/12_native_cuda_kernels/main.cpp b/examples/cpp/11_native_cuda_kernels/main.cpp similarity index 100% rename from examples/cpp/12_native_cuda_kernels/main.cpp rename to examples/cpp/11_native_cuda_kernels/main.cpp diff --git a/examples/cpp/13_native_opencl_kernels/CMakeLists.txt b/examples/cpp/12_native_opencl_kernels/CMakeLists.txt similarity index 100% rename from examples/cpp/13_native_opencl_kernels/CMakeLists.txt rename to examples/cpp/12_native_opencl_kernels/CMakeLists.txt diff --git a/examples/cpp/13_native_opencl_kernels/Makefile b/examples/cpp/12_native_opencl_kernels/Makefile similarity index 100% rename from examples/cpp/13_native_opencl_kernels/Makefile rename to examples/cpp/12_native_opencl_kernels/Makefile diff --git a/examples/cpp/13_native_opencl_kernels/addVectors.cl b/examples/cpp/12_native_opencl_kernels/addVectors.cl similarity index 100% rename from examples/cpp/13_native_opencl_kernels/addVectors.cl rename to examples/cpp/12_native_opencl_kernels/addVectors.cl diff --git a/examples/cpp/13_native_opencl_kernels/main.cpp b/examples/cpp/12_native_opencl_kernels/main.cpp similarity index 100% rename from examples/cpp/13_native_opencl_kernels/main.cpp rename to examples/cpp/12_native_opencl_kernels/main.cpp diff --git a/examples/cpp/14_openmp_interop/.gitignore b/examples/cpp/13_openmp_interop/.gitignore similarity index 100% rename from examples/cpp/14_openmp_interop/.gitignore rename to examples/cpp/13_openmp_interop/.gitignore diff --git a/examples/cpp/14_openmp_interop/CMakeLists.txt b/examples/cpp/13_openmp_interop/CMakeLists.txt similarity index 100% rename from examples/cpp/14_openmp_interop/CMakeLists.txt rename to examples/cpp/13_openmp_interop/CMakeLists.txt diff --git a/examples/cpp/14_openmp_interop/Makefile b/examples/cpp/13_openmp_interop/Makefile similarity index 100% rename from examples/cpp/14_openmp_interop/Makefile rename to examples/cpp/13_openmp_interop/Makefile diff --git a/examples/cpp/09_mpi/addVectors.okl b/examples/cpp/13_openmp_interop/addVectors.okl similarity index 100% rename from examples/cpp/09_mpi/addVectors.okl rename to examples/cpp/13_openmp_interop/addVectors.okl diff --git a/examples/cpp/14_openmp_interop/main.cpp b/examples/cpp/13_openmp_interop/main.cpp similarity index 100% rename from examples/cpp/14_openmp_interop/main.cpp rename to examples/cpp/13_openmp_interop/main.cpp diff --git a/examples/cpp/15_cuda_interop/.gitignore b/examples/cpp/14_cuda_interop/.gitignore similarity index 100% rename from examples/cpp/15_cuda_interop/.gitignore rename to examples/cpp/14_cuda_interop/.gitignore diff --git a/examples/cpp/15_cuda_interop/CMakeLists.txt b/examples/cpp/14_cuda_interop/CMakeLists.txt similarity index 100% rename from examples/cpp/15_cuda_interop/CMakeLists.txt rename to examples/cpp/14_cuda_interop/CMakeLists.txt diff --git a/examples/cpp/15_cuda_interop/Makefile b/examples/cpp/14_cuda_interop/Makefile similarity index 100% rename from examples/cpp/15_cuda_interop/Makefile rename to examples/cpp/14_cuda_interop/Makefile diff --git a/examples/cpp/14_openmp_interop/addVectors.okl b/examples/cpp/14_cuda_interop/addVectors.okl similarity index 100% rename from examples/cpp/14_openmp_interop/addVectors.okl rename to examples/cpp/14_cuda_interop/addVectors.okl diff --git a/examples/cpp/15_cuda_interop/main.cpp b/examples/cpp/14_cuda_interop/main.cpp similarity index 100% rename from examples/cpp/15_cuda_interop/main.cpp rename to examples/cpp/14_cuda_interop/main.cpp diff --git a/examples/cpp/15_cuda_interop/addVectors.okl b/examples/cpp/15_cuda_interop/addVectors.okl deleted file mode 100644 index ba0dad448..000000000 --- a/examples/cpp/15_cuda_interop/addVectors.okl +++ /dev/null @@ -1,8 +0,0 @@ -@kernel void addVectors(const int entries, - const float *a, - const float *b, - float *ab) { - for (int i = 0; i < entries; ++i; @tile(16, @outer, @inner)) { - ab[i] = a[i] + b[i]; - } -} diff --git a/examples/cpp/16_finite_difference/.gitignore b/examples/cpp/15_finite_difference/.gitignore similarity index 100% rename from examples/cpp/16_finite_difference/.gitignore rename to examples/cpp/15_finite_difference/.gitignore diff --git a/examples/cpp/16_finite_difference/.occa/.gitignore b/examples/cpp/15_finite_difference/.occa/.gitignore similarity index 100% rename from examples/cpp/16_finite_difference/.occa/.gitignore rename to examples/cpp/15_finite_difference/.occa/.gitignore diff --git a/examples/cpp/16_finite_difference/Makefile b/examples/cpp/15_finite_difference/Makefile similarity index 100% rename from examples/cpp/16_finite_difference/Makefile rename to examples/cpp/15_finite_difference/Makefile diff --git a/examples/cpp/16_finite_difference/README.md b/examples/cpp/15_finite_difference/README.md similarity index 100% rename from examples/cpp/16_finite_difference/README.md rename to examples/cpp/15_finite_difference/README.md diff --git a/examples/cpp/16_finite_difference/fd2d.okl b/examples/cpp/15_finite_difference/fd2d.okl similarity index 100% rename from examples/cpp/16_finite_difference/fd2d.okl rename to examples/cpp/15_finite_difference/fd2d.okl diff --git a/examples/cpp/16_finite_difference/include/visualizer.hpp b/examples/cpp/15_finite_difference/include/visualizer.hpp similarity index 100% rename from examples/cpp/16_finite_difference/include/visualizer.hpp rename to examples/cpp/15_finite_difference/include/visualizer.hpp diff --git a/examples/cpp/16_finite_difference/main.cpp b/examples/cpp/15_finite_difference/main.cpp similarity index 100% rename from examples/cpp/16_finite_difference/main.cpp rename to examples/cpp/15_finite_difference/main.cpp diff --git a/examples/cpp/16_finite_difference/settings.json b/examples/cpp/15_finite_difference/settings.json similarity index 100% rename from examples/cpp/16_finite_difference/settings.json rename to examples/cpp/15_finite_difference/settings.json diff --git a/examples/cpp/16_finite_difference/src/visualizer.cpp b/examples/cpp/15_finite_difference/src/visualizer.cpp similarity index 100% rename from examples/cpp/16_finite_difference/src/visualizer.cpp rename to examples/cpp/15_finite_difference/src/visualizer.cpp diff --git a/examples/cpp/17_mandelbulb/.gitignore b/examples/cpp/16_mandelbulb/.gitignore similarity index 100% rename from examples/cpp/17_mandelbulb/.gitignore rename to examples/cpp/16_mandelbulb/.gitignore diff --git a/examples/cpp/17_mandelbulb/Makefile b/examples/cpp/16_mandelbulb/Makefile similarity index 100% rename from examples/cpp/17_mandelbulb/Makefile rename to examples/cpp/16_mandelbulb/Makefile diff --git a/examples/cpp/17_mandelbulb/README.md b/examples/cpp/16_mandelbulb/README.md similarity index 100% rename from examples/cpp/17_mandelbulb/README.md rename to examples/cpp/16_mandelbulb/README.md diff --git a/examples/cpp/17_mandelbulb/include/visualizer.hpp b/examples/cpp/16_mandelbulb/include/visualizer.hpp similarity index 100% rename from examples/cpp/17_mandelbulb/include/visualizer.hpp rename to examples/cpp/16_mandelbulb/include/visualizer.hpp diff --git a/examples/cpp/17_mandelbulb/main.cpp b/examples/cpp/16_mandelbulb/main.cpp similarity index 100% rename from examples/cpp/17_mandelbulb/main.cpp rename to examples/cpp/16_mandelbulb/main.cpp diff --git a/examples/cpp/17_mandelbulb/obj/.gitignore b/examples/cpp/16_mandelbulb/obj/.gitignore similarity index 100% rename from examples/cpp/17_mandelbulb/obj/.gitignore rename to examples/cpp/16_mandelbulb/obj/.gitignore diff --git a/examples/cpp/17_mandelbulb/rayMarcher.okl b/examples/cpp/16_mandelbulb/rayMarcher.okl similarity index 100% rename from examples/cpp/17_mandelbulb/rayMarcher.okl rename to examples/cpp/16_mandelbulb/rayMarcher.okl diff --git a/examples/cpp/17_mandelbulb/settings.json b/examples/cpp/16_mandelbulb/settings.json similarity index 100% rename from examples/cpp/17_mandelbulb/settings.json rename to examples/cpp/16_mandelbulb/settings.json diff --git a/examples/cpp/17_mandelbulb/src/visualizer.cpp b/examples/cpp/16_mandelbulb/src/visualizer.cpp similarity index 100% rename from examples/cpp/17_mandelbulb/src/visualizer.cpp rename to examples/cpp/16_mandelbulb/src/visualizer.cpp diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index b09adbb15..3982216db 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -4,16 +4,15 @@ add_subdirectory(03_arrays) add_subdirectory(04_generic_inline_kernel) add_subdirectory(05_custom_types) add_subdirectory(06_shared_memory) -add_subdirectory(07_memory_management) -add_subdirectory(08_streams) -add_subdirectory(09_mpi) -add_subdirectory(10_native_cpp_kernels) -add_subdirectory(11_native_c_kernels) -add_subdirectory(12_native_cuda_kernels) -add_subdirectory(13_native_opencl_kernels) -add_subdirectory(14_openmp_interop) -add_subdirectory(15_cuda_interop) +add_subdirectory(07_streams) +add_subdirectory(08_mpi) +add_subdirectory(09_native_cpp_kernels) +add_subdirectory(10_native_c_kernels) +add_subdirectory(11_native_cuda_kernels) +add_subdirectory(12_native_opencl_kernels) +add_subdirectory(13_openmp_interop) +add_subdirectory(14_cuda_interop) # Don't force-compile OpenGL examples -# add_subdirectory(16_finite_difference) -# add_subdirectory(17_mandelbulb) +# add_subdirectory(15_finite_difference) +# add_subdirectory(16_mandelbulb) diff --git a/examples/fortran/02_background_device/CMakeLists.txt b/examples/fortran/02_background_device/CMakeLists.txt deleted file mode 100644 index b16417559..000000000 --- a/examples/fortran/02_background_device/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -compile_fortran_example_with_modes(02_background_device main.f90) - -add_custom_target(fortran_example_02_addVectors_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/addVectors.okl addVectors.okl) -add_dependencies(examples_fortran_02_background_device fortran_example_02_addVectors_okl) diff --git a/examples/fortran/02_background_device/Makefile b/examples/fortran/02_background_device/Makefile deleted file mode 100644 index b85e8207b..000000000 --- a/examples/fortran/02_background_device/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef OCCA_DIR - OCCA_DIR=$(PROJ_DIR)/../../.. -endif -include $(OCCA_DIR)/scripts/build/Makefile - - -#---[ COMPILATION ]------------------------------- -all: main - -%: %.f90 - @mkdir -p $(abspath $(dir $@)) - $(fCompiler) $(fCompilerFlags) -o $@ $^ $(flags) $(fPaths) $(fLinkerFlags) - -clean: - rm -rf $(PROJ_DIR)/main -#================================================= - - -#---[ RUN ]--------------------------------------- -run: main - $(PROJ_DIR)/main --verbose -#================================================= diff --git a/examples/fortran/02_background_device/addVectors.okl b/examples/fortran/02_background_device/addVectors.okl deleted file mode 100644 index ba0dad448..000000000 --- a/examples/fortran/02_background_device/addVectors.okl +++ /dev/null @@ -1,8 +0,0 @@ -@kernel void addVectors(const int entries, - const float *a, - const float *b, - float *ab) { - for (int i = 0; i < entries; ++i; @tile(16, @outer, @inner)) { - ab[i] = a[i] + b[i]; - } -} diff --git a/examples/fortran/02_background_device/main.f90 b/examples/fortran/02_background_device/main.f90 deleted file mode 100644 index b917fe4ab..000000000 --- a/examples/fortran/02_background_device/main.f90 +++ /dev/null @@ -1,122 +0,0 @@ -program main - use, intrinsic :: iso_c_binding, & - C_void_ptr => C_ptr - use occa - - implicit none - - integer :: i - integer(occaUDim_t) :: iu - integer(occaUDim_t) :: entries = 5 - character(len=1024) :: arg, info - - ! OCCA device, kernel, memory and property objects - type(occaKernel) :: addVectors - type(C_void_ptr) :: a, b, ab - real(C_float), pointer :: a_ptr(:), b_ptr(:), ab_ptr(:) - - ! Set default OCCA device info - info = "{mode: 'Serial'}" - !info = "{mode: 'OpenMP', schedule: 'compact', chunk: 10}" - !info = "{mode: 'CUDA' , device_id: 0}" - !info = "{mode: 'OpenCL', platform_id: 0, device_id: 0}" - - ! Parse command arguments - i = 1 - do while (i .le. command_argument_count()) - call get_command_argument(i, arg) - - select case (arg) - case ("-v", "--verbose") - call occaJsonObjectSet(occaSettings(), "kernel/verbose", occaTrue) - case ("-d", "--device") - i = i+1 - call get_command_argument(i, info) - case ("-h", "--help") - call print_help() - stop - case default - write(*,'(2a, /)') "Unrecognised command-line option: ", arg - stop - end select - i = i+1 - end do - - ! Print device infos - call occaPrintModeInfo() - - ! Create OCCA device - call occaSetDeviceFromString(F_C_str(info)) - - ! umalloc: [U]nified [M]emory [Alloc]ation - ! Allocate host memory that auto-syncs with the device between before kernel - ! calls and occaFinish() if needed. - a = occaTypedUMalloc(entries, occaDtypeFloat, C_NULL_ptr, occaDefault) - b = occaTypedUMalloc(entries, occaDtypeFloat, C_NULL_ptr, occaDefault) - ab = occaTypedUMalloc(entries, occaDtypeFloat, C_NULL_ptr, occaDefault) - - ! Assign Fortran pointers to the (host) memory - if (C_associated(a)) then - call C_F_pointer(a,a_ptr,[entries]) - else - a_ptr => null() - end if - if (C_associated(b)) then - call C_F_pointer(b,b_ptr,[entries]) - else - b_ptr => null() - end if - if (C_associated(ab)) then - call C_F_pointer(ab,ab_ptr,[entries]) - else - ab_ptr => null() - end if - - ! Initialise host arrays - do iu=1,entries - a_ptr(iu) = real(iu)-1 - b_ptr(iu) = 2-real(iu) - end do - ab_ptr = 0 - - ! Compile the kernel at run-time - addVectors = occaBuildKernel(F_C_str("addVectors.okl"), & - F_C_str("addVectors"), & - occaDefault) - - ! Launch device kernel - ! Arrays a, b, and ab are now resident on the device - call occaKernelRun(addVectors, occaInt(entries), occaPtr(a), occaPtr(b), occaPtr(ab)) - - ! a and b are const in the kernel, so we can use `dontSync` to manually force - ! a and b to not sync - call occaDontSync(a) - call occaDontSync(b) - - ! Finish work queued up on the device, synchronizing a, b, and ab and making - ! it safe to use them again - call occaFinish() - - ! Assert values - do iu=1,entries - write(*,'(a,i2,a,f3.1)') "ab(", iu, ") = ", ab_ptr(iu) - end do - do iu=1,entries - if (abs(ab_ptr(iu) - (a_ptr(iu) + b_ptr(iu))) > 1.0e-8) stop "*** Wrong result ***" - end do - - ! Free device memory and OCCA objects - call occaFree(addVectors) - call occaFreeUvaPtr(a) - call occaFreeUvaPtr(b) - call occaFreeUvaPtr(ab) - -contains - subroutine print_help() - write(*,'(a, /)') "Example showing how to use background devices, allowing passing of the device implicitly" - write(*,'(a, /)') "command-line options:" - write(*,'(a)') " -v, --verbose Compile kernels in verbose mode" - write(*,'(a)') " -d, --device Device properties (default: ""{mode: 'Serial'}"")" - write(*,'(a)') " -h, --help Print this information and exit" - end subroutine print_help -end program main diff --git a/examples/fortran/11_mpi_unified_memory/CMakeLists.txt b/examples/fortran/11_mpi_unified_memory/CMakeLists.txt deleted file mode 100644 index c21c07442..000000000 --- a/examples/fortran/11_mpi_unified_memory/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -if (MPI_FOUND) - compile_fortran_mpi_example_with_modes(11_mpi_unified_memory main.f90) - - add_custom_target(fortran_example_11_addVectors_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/addVectors.okl addVectors.okl) - add_dependencies(examples_fortran_11_mpi_unified_memory fortran_example_11_addVectors_okl) -endif() diff --git a/examples/fortran/11_mpi_unified_memory/Makefile b/examples/fortran/11_mpi_unified_memory/Makefile deleted file mode 100644 index c18892f6b..000000000 --- a/examples/fortran/11_mpi_unified_memory/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) - -ifndef OCCA_DIR - OCCA_DIR=$(PROJ_DIR)/../../.. -endif -include $(OCCA_DIR)/scripts/build/Makefile - - -#---[ COMPILATION ]------------------------------- -all: main - -%: %.f90 - @mkdir -p $(abspath $(dir $@)) - $(fCompiler) $(fCompilerFlags) -o $@ $^ $(flags) $(fPaths) $(fLinkerFlags) - -clean: - rm -rf $(PROJ_DIR)/main -#================================================= - - -#---[ RUN ]--------------------------------------- -run: main - mpiexec -np 2 $(PROJ_DIR)/main -#================================================= diff --git a/examples/fortran/11_mpi_unified_memory/addVectors.okl b/examples/fortran/11_mpi_unified_memory/addVectors.okl deleted file mode 100644 index ba0dad448..000000000 --- a/examples/fortran/11_mpi_unified_memory/addVectors.okl +++ /dev/null @@ -1,8 +0,0 @@ -@kernel void addVectors(const int entries, - const float *a, - const float *b, - float *ab) { - for (int i = 0; i < entries; ++i; @tile(16, @outer, @inner)) { - ab[i] = a[i] + b[i]; - } -} diff --git a/examples/fortran/11_mpi_unified_memory/main.f90 b/examples/fortran/11_mpi_unified_memory/main.f90 deleted file mode 100644 index cf684a20c..000000000 --- a/examples/fortran/11_mpi_unified_memory/main.f90 +++ /dev/null @@ -1,194 +0,0 @@ -program main - use mpi - use occa - use, intrinsic :: iso_fortran_env, only : stdout=>output_unit, & - stderr=>error_unit - - implicit none - - integer :: ierr, id - integer :: myid, npes, gcomm, tag ! MPI variables - integer, dimension(2) :: request - integer, dimension(MPI_STATUS_SIZE) :: status - integer :: otherID, offset - integer(occaUDim_t) :: iu - integer(occaUDim_t) :: entries = 8 - character(len=1024), dimension(0:1) :: info - real(C_float), dimension(0:1) :: ab_sum - real(C_float) :: ab_gather - - ! OCCA kernel, memory and property objects - type(occaKernel) :: addVectors - type(C_void_ptr) :: a, b, ab - real(C_float), pointer :: a_ptr(:), b_ptr(:), ab_ptr(:) - - ! Initialise MPI - gcomm = MPI_COMM_WORLD - call MPI_Init(ierr) - if (ierr /= MPI_SUCCESS) call stop_mpi("*** MPI_Init error ***") - call MPI_Comm_rank(gcomm, myid, ierr) - call MPI_Comm_size(gcomm, npes, ierr) - if (npes /= 2) then - call stop_mpi("*** Example expects to run with 2 processes ***", myid) - end if - - ! Set OCCA device info - !info = "{mode: 'Serial'}" - !info = "{mode: 'OpenMP', schedule: 'compact', chunk: 2}" - !info(0) = "{mode: 'OpenMP', schedule: 'compact', chunk: 2}" - !info(1) = "{mode: 'CUDA' , device_id: 0}" - info(0) = "{mode: 'OpenMP', schedule: 'compact', chunk: 2}" - info(1) = "{mode: 'OpenCL', platform_id: 0, device_id: 0}" - - ! Print device infos - if(myid == 0) then - call occaPrintModeInfo() - endif - call MPI_Barrier(gcomm, ierr) - - ! Create OCCA device - call occaSetDeviceFromString(F_C_str(info(myid))) - - ! umalloc: [U]nified [M]emory [Alloc]ation - ! Allocate host memory that auto-syncs with the device between before kernel - ! calls and occaFinish() if needed. - a = occaTypedUMalloc(entries, occaDtypeFloat, C_NULL_ptr, occaDefault) - b = occaTypedUMalloc(entries, occaDtypeFloat, C_NULL_ptr, occaDefault) - ab = occaTypedUMalloc(entries, occaDtypeFloat, C_NULL_ptr, occaDefault) - - ! Assign Fortran pointers to the (host) memory - if (C_associated(a)) then - call C_F_pointer(a,a_ptr,[entries]) - else - a_ptr => null() - end if - if (C_associated(b)) then - call C_F_pointer(b,b_ptr,[entries]) - else - b_ptr => null() - end if - if (C_associated(ab)) then - call C_F_pointer(ab,ab_ptr,[entries]) - else - ab_ptr => null() - end if - - ! Initialise host arrays - do iu=1,entries - a_ptr(iu) = real(iu)-1 - b_ptr(iu) = myid-real(iu) - end do - ab_ptr = 0 - - ! Compile the kernel at run-time - addVectors = occaBuildKernel(F_C_str("addVectors.okl"), & - F_C_str("addVectors"), & - occaDefault) - - ! Launch device kernel - ! Arrays a, b, and ab are now resident on the device - call occaKernelRun(addVectors, occaInt(entries), occaPtr(a), occaPtr(b), occaPtr(ab)) - - ! a and b are const in the kernel, so we can use `dontSync` to manually force - ! a and b to not sync - call occaDontSync(a) - call occaDontSync(b) - - ! Finish work queued up on the device, synchronizing a, b, and ab and making - ! it safe to use them again - call occaFinish() - - ! Send/receive the result array - otherID = mod(myid + 1, 2) - offset = int(entries/2) - tag = 123 - request = MPI_REQUEST_NULL - call MPI_IRecv(ab_ptr(otherID*offset+1), & - offset, & - MPI_FLOAT, & - otherID, & - tag, & - gcomm, & - request(1), & - ierr) - call MPI_ISend(ab_ptr(myid*offset+1), & - offset, & - MPI_FLOAT, & - otherID, & - tag, & - gcomm, & - request(2), & - ierr) - call MPI_Wait(request(1), status, ierr) - call MPI_Wait(request(2), status, ierr) - - ! Assert values - call flush(stdout) - ab_sum = myid - ab_gather = sum(ab_ptr) - call MPI_Gather(ab_gather, 1, MPI_FLOAT, ab_sum, 1, MPI_FLOAT, 0, gcomm, ierr) - if (myid == 0) then - if (abs(ab_sum(myid) - ab_sum(otherID)) > 1.0e-8) stop "*** Wrong result ***" - end if - - ! Print values - call flush(stdout) - call MPI_Barrier(gcomm, ierr) - do id=0,npes-1 - if (id == myid) then - call flush(stdout) - do iu=1,entries - write(stdout,'(a,i1,a,i2,a,f5.1)') "#", id, ": ab(", iu, ") = ", ab_ptr(iu) - call flush(stdout) - end do - end if - call MPI_Barrier(gcomm, ierr) - end do - - ! Free device memory and OCCA objects - call occaFree(addVectors) - call occaFreeUvaPtr(a) - call occaFreeUvaPtr(b) - call occaFreeUvaPtr(ab) - - ! Cleanup MPI - call MPI_Finalize(ierr) - if (ierr /= MPI_SUCCESS) call stop_mpi("*** MPI_Finalize error ***", myid) - -contains - subroutine stop_mpi(error, myid, error_code) - implicit none - - integer, intent(in), optional :: myid, error_code - character(len=*), intent(in), optional :: error - integer :: ierr, ec - - call flush(stdout) - call MPI_Barrier(gcomm, ierr) - - if(present(error)) then - if(present(myid)) then - if(myid == 0) then - write(stdout,'(a)') '' - write(stdout,'(a)') error - write(stdout,'(a)') '' - end if - else - write(stdout,'(a)') '' - write(stdout,'(a)') error - write(stdout,'(a)') '' - end if - end if - - call flush(stdout) - call MPI_Barrier(gcomm, ierr) - - if(present(error_code)) then - ec = error_code - else - ec = -1 - end if - - call MPI_Abort(gcomm, ec, ierr) - end subroutine -end program main diff --git a/include/occa.h b/include/occa.h index 22cbc9687..1d6a60929 100644 --- a/include/occa.h +++ b/include/occa.h @@ -8,7 +8,6 @@ #include #include #include -#include // Just in case someone wants to run with an older format than C99 #ifndef OCCA_DISABLE_VARIADIC_MACROS diff --git a/include/occa/c/base.h b/include/occa/c/base.h index d4886cfd4..e61b6e2ab 100644 --- a/include/occa/c/base.h +++ b/include/occa/c/base.h @@ -63,15 +63,6 @@ occaMemory occaTypedMalloc(const occaUDim_t entries, const void *src, occaJson props); -void* occaUMalloc(const occaUDim_t bytes, - const void *src, - occaJson props); - -void* occaTypedUMalloc(const occaUDim_t entries, - const occaDtype type, - const void *src, - occaJson props); - occaMemory occaWrapMemory(const void *ptr, const occaUDim_t bytes, occaJson props); diff --git a/include/occa/c/device.h b/include/occa/c/device.h index a9c0adf63..7f89aeb22 100644 --- a/include/occa/c/device.h +++ b/include/occa/c/device.h @@ -77,17 +77,6 @@ occaMemory occaDeviceTypedMalloc(occaDevice device, const void *src, occaJson props); -void* occaDeviceUMalloc(occaDevice device, - const occaUDim_t bytes, - const void *src, - occaJson props); - -void* occaDeviceTypedUMalloc(occaDevice device, - const occaUDim_t entries, - const occaDtype dtype, - const void *src, - occaJson props); - occaMemory occaDeviceWrapMemory(occaDevice device, const void *ptr, const occaUDim_t bytes, diff --git a/include/occa/c/memory.h b/include/occa/c/memory.h index af1b6da2e..a665f4043 100644 --- a/include/occa/c/memory.h +++ b/include/occa/c/memory.h @@ -20,31 +20,6 @@ occaMemory occaMemorySlice(occaMemory memory, const occaDim_t offset, const occaDim_t bytes); -//---[ UVA ]---------------------------- -bool occaMemoryIsManaged(occaMemory memory); - -bool occaMemoryInDevice(occaMemory memory); - -bool occaMemoryIsStale(occaMemory memory); - -void occaMemoryStartManaging(occaMemory memory); - -void occaMemoryStopManaging(occaMemory memory); - -void occaMemorySyncToDevice(occaMemory memory, - const occaDim_t bytes, - const occaDim_t offset); - -void occaMemorySyncToHost(occaMemory memory, - const occaDim_t bytes, - const occaDim_t offset); -//====================================== - -void occaMemcpy(void *dest, - const void *src, - const occaUDim_t bytes, - occaJson props); - void occaCopyMemToMem(occaMemory dest, occaMemory src, const occaUDim_t bytes, const occaUDim_t destOffset, diff --git a/include/occa/c/uva.h b/include/occa/c/uva.h deleted file mode 100644 index ca1366b0a..000000000 --- a/include/occa/c/uva.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef OCCA_C_UVA_HEADER -#define OCCA_C_UVA_HEADER - -#include -#include - -OCCA_START_EXTERN_C - -bool occaIsManaged(void *ptr); -void occaStartManaging(void *ptr); -void occaStopManaging(void *ptr); - -void occaSyncToDevice(void *ptr, - const occaUDim_t bytes); -void occaSyncToHost(void *ptr, - const occaUDim_t bytes); - -bool occaNeedsSync(void *ptr); -void occaSync(void *ptr); -void occaDontSync(void *ptr); - -void occaFreeUvaPtr(void *ptr); - -OCCA_END_EXTERN_C - -#endif diff --git a/include/occa/core/base.hpp b/include/occa/core/base.hpp index 3327e74c7..84320a350 100644 --- a/include/occa/core/base.hpp +++ b/include/occa/core/base.hpp @@ -75,25 +75,6 @@ namespace occa { const void *src, const occa::json &props); - void* umalloc(const dim_t entries, - const dtype_t &dtype, - const void *src = NULL, - const occa::json &props = occa::json()); - - template - T* umalloc(const dim_t entries, - const void *src = NULL, - const occa::json &props = occa::json()); - - template <> - void* umalloc(const dim_t entries, - const void *src, - const occa::json &props); - - void memcpy(void *dest, const void *src, - const dim_t bytes, - const occa::json &props = json()); - void memcpy(memory dest, const void *src, const dim_t bytes = -1, const dim_t offset = 0, @@ -110,9 +91,6 @@ namespace occa { const dim_t srcOffset = 0, const occa::json &props = json()); - void memcpy(void *dest, const void *src, - const occa::json &props); - void memcpy(memory dest, const void *src, const occa::json &props); diff --git a/include/occa/core/base.tpp b/include/occa/core/base.tpp index d67289c39..fc4ad108f 100644 --- a/include/occa/core/base.tpp +++ b/include/occa/core/base.tpp @@ -6,13 +6,6 @@ namespace occa { return malloc(entries, dtype::get(), src, props); } - template - T* umalloc(const dim_t entries, - const void *src, - const occa::json &props) { - return (T*) umalloc(entries, dtype::get(), src, props); - } - template occa::memory wrapMemory(const T *ptr, const dim_t entries, diff --git a/include/occa/core/device.hpp b/include/occa/core/device.hpp index 28ffc46fc..874116d5c 100644 --- a/include/occa/core/device.hpp +++ b/include/occa/core/device.hpp @@ -221,7 +221,7 @@ namespace occa { * * @endDoc */ - bool isInitialized(); + bool isInitialized() const; modeDevice_t* getModeDevice() const; @@ -635,34 +635,6 @@ namespace occa { const dtype_t &dtype, const occa::json &props); - void* umalloc(const dim_t entries, - const dtype_t &dtype, - const void *src = NULL, - const occa::json &props = occa::json()); - - void* umalloc(const dim_t entries, - const dtype_t &dtype, - const occa::memory src, - const occa::json &props = occa::json()); - - void* umalloc(const dim_t entries, - const dtype_t &dtype, - const occa::json &props); - - template - T* umalloc(const dim_t entries, - const void *src = NULL, - const occa::json &props = occa::json()); - - template - T* umalloc(const dim_t entries, - const occa::memory src, - const occa::json &props = occa::json()); - - template - T* umalloc(const dim_t entries, - const occa::json &props); - /** * @startDoc{wrapMemory} * diff --git a/include/occa/core/device.tpp b/include/occa/core/device.tpp index 9388c668e..f20e12512 100644 --- a/include/occa/core/device.tpp +++ b/include/occa/core/device.tpp @@ -19,26 +19,6 @@ namespace occa { return malloc(entries, occa::dtype::get(), props); } - template - T* device::umalloc(const dim_t entries, - const void *src, - const occa::json &props) { - return (T*) umalloc(entries, dtype::get(), src, props); - } - - template - T* device::umalloc(const dim_t entries, - const occa::memory src, - const occa::json &props) { - return (T*) umalloc(entries, dtype::get(), src, props); - } - - template - T* device::umalloc(const dim_t entries, - const occa::json &props) { - return (T*) umalloc(entries, dtype::get(), props); - } - template <> occa::memory device::wrapMemory(const void *ptr, const dim_t entries, diff --git a/include/occa/core/kernelArg.hpp b/include/occa/core/kernelArg.hpp index 0328feaa1..5156eab25 100644 --- a/include/occa/core/kernelArg.hpp +++ b/include/occa/core/kernelArg.hpp @@ -36,8 +36,6 @@ namespace occa { void* ptr() const; bool isPointer() const; - - void setupForKernelCall(const bool isConst) const; }; //==================================== @@ -71,23 +69,23 @@ namespace occa { } inline virtual void pointerConstructor(void *ptr, const dtype_t &dtype_) { - addPointer(ptr, sizeof(void*), true, false); + addPointer(ptr, sizeof(void*)); } inline virtual void pointerConstructor(const void *ptr, const dtype_t &dtype_) { - addPointer(const_cast(ptr), sizeof(void*), true, false); + addPointer(const_cast(ptr), sizeof(void*)); } OCCA_GENERIC_CLASS_CONSTRUCTORS(kernelArg); template kernelArg(const type2 &arg) { - addPointer((void*) const_cast*>(&arg), sizeof(type2), false); + addPointer((void*) const_cast*>(&arg), sizeof(type2)); } template kernelArg(const type4 &arg) { - addPointer((void*) const_cast*>(&arg), sizeof(type4), false); + addPointer((void*) const_cast*>(&arg), sizeof(type4)); } int size() const; @@ -98,11 +96,8 @@ namespace occa { void add(const kernelArg &arg); - void addPointer(void *arg, - bool lookAtUva = true, bool argIsUva = false); - - void addPointer(void *arg, size_t bytes, - bool lookAtUva = true, bool argIsUva = false); + void addPointer(void *arg); + void addPointer(void *arg, size_t bytes); void addMemory(modeMemory_t *arg); diff --git a/include/occa/core/memory.hpp b/include/occa/core/memory.hpp index 29d26a4b1..3fea0b27f 100644 --- a/include/occa/core/memory.hpp +++ b/include/occa/core/memory.hpp @@ -70,7 +70,6 @@ namespace occa { public: memory(); - memory(void *uvaPtr); memory(modeMemory_t *modeMemory_); memory(const memory &m); @@ -229,23 +228,6 @@ namespace occa { return size() / sizeof(T); } - //---[ UVA ]------------------------ - bool isManaged() const; - bool inDevice() const; - bool isStale() const; - - void setupUva(); - void startManaging(); - void stopManaging(); - - void syncToDevice(const dim_t bytes, const dim_t offset); - void syncToHost(const dim_t bytes, const dim_t offset); - - bool uvaIsStale() const; - void uvaMarkStale(); - void uvaMarkFresh(); - //================================== - /** * @startDoc{operator_equals[0]} * diff --git a/include/occa/utils.hpp b/include/occa/utils.hpp index e1aef6eb8..4f229f336 100644 --- a/include/occa/utils.hpp +++ b/include/occa/utils.hpp @@ -6,6 +6,5 @@ #include #include #include -#include #endif diff --git a/include/occa/utils/uva.hpp b/include/occa/utils/uva.hpp deleted file mode 100644 index 69b429665..000000000 --- a/include/occa/utils/uva.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef OCCA_UTILS_UVA_HEADER -#define OCCA_UTILS_UVA_HEADER - -#include -#include - -#include -#include - -namespace occa { - occa::modeMemory_t* uvaToMemory(void *ptr); - - bool isManaged(void *ptr); - void startManaging(void *ptr); - void stopManaging(void *ptr); - - void syncToDevice(void *ptr, const udim_t bytes = (udim_t) -1); - void syncToHost(void *ptr, const udim_t bytes = (udim_t) -1); - - void syncMemToDevice(occa::modeMemory_t *mem, - const udim_t bytes = (udim_t) -1, - const udim_t offset = 0); - - void syncMemToHost(occa::modeMemory_t *mem, - const udim_t bytes = (udim_t) -1, - const udim_t offset = 0); - - bool needsSync(void *ptr); - void sync(void *ptr); - void dontSync(void *ptr); - - void freeUvaPtr(void *ptr); -} - -#endif diff --git a/src/c/base.cpp b/src/c/base.cpp index 0542504bc..171d8695c 100644 --- a/src/c/base.cpp +++ b/src/c/base.cpp @@ -168,30 +168,6 @@ occaMemory occaTypedMalloc(const occaUDim_t entries, return occa::c::newOccaType(memory); } -void* occaUMalloc(const occaUDim_t bytes, - const void *src, - occaJson props) { - return occaTypedUMalloc(bytes, - occaDtypeByte, - src, - props); -} - -void* occaTypedUMalloc(const occaUDim_t entries, - const occaDtype dtype, - const void *src, - occaJson props) { - const occa::dtype_t &dtype_ = occa::c::dtype(dtype); - - if (occa::c::isDefault(props)) { - return occa::umalloc(entries, dtype_, src); - } - return occa::umalloc(entries, - dtype_, - src, - occa::c::json(props)); -} - occaMemory occaWrapMemory(const void *ptr, const occaUDim_t bytes, occaJson props) { diff --git a/src/c/device.cpp b/src/c/device.cpp index 754a421dd..c6759a364 100644 --- a/src/c/device.cpp +++ b/src/c/device.cpp @@ -219,34 +219,6 @@ occaMemory occaDeviceTypedMalloc(occaDevice device, return occa::c::newOccaType(memory); } -void* occaDeviceUMalloc(occaDevice device, - const occaUDim_t bytes, - const void *src, - occaJson props) { - return occaDeviceTypedUMalloc(device, - bytes, - occaDtypeByte, - src, - props); -} - -void* occaDeviceTypedUMalloc(occaDevice device, - const occaUDim_t entries, - const occaDtype dtype, - const void *src, - occaJson props) { - occa::device device_ = occa::c::device(device); - const occa::dtype_t &dtype_ = occa::c::dtype(dtype); - - if (occa::c::isDefault(props)) { - return device_.umalloc(entries, dtype_, src); - } - return device_.umalloc(entries, - dtype_, - src, - occa::c::json(props)); -} - occaMemory occaDeviceWrapMemory(occaDevice device, const void *ptr, const occaUDim_t bytes, diff --git a/src/c/memory.cpp b/src/c/memory.cpp index cc48ad2ee..ac6cc6f12 100644 --- a/src/c/memory.cpp +++ b/src/c/memory.cpp @@ -37,53 +37,6 @@ occaMemory occaMemorySlice(occaMemory memory, return occa::c::newOccaType(memSlice); } -//---[ UVA ]---------------------------- -bool occaMemoryIsManaged(occaMemory memory) { - return (int) occa::c::memory(memory).isManaged(); -} - -bool occaMemoryInDevice(occaMemory memory) { - return (int) occa::c::memory(memory).inDevice(); -} - -bool occaMemoryIsStale(occaMemory memory) { - return (int) occa::c::memory(memory).isStale(); -} - -void occaMemoryStartManaging(occaMemory memory) { - occa::c::memory(memory).startManaging(); -} - -void occaMemoryStopManaging(occaMemory memory) { - occa::c::memory(memory).stopManaging(); -} - -void occaMemorySyncToDevice(occaMemory memory, - const occaDim_t bytes, - const occaDim_t offset) { - - occa::c::memory(memory).syncToDevice(bytes, offset); -} - -void occaMemorySyncToHost(occaMemory memory, - const occaDim_t bytes, - const occaDim_t offset) { - - occa::c::memory(memory).syncToHost(bytes, offset); -} -//====================================== - -void occaMemcpy(void *dest, const void *src, - const occaUDim_t bytes, - occaJson props) { - if (occa::c::isDefault(props)) { - occa::memcpy(dest, src, bytes); - } else { - occa::memcpy(dest, src, bytes, - occa::c::json(props)); - } -} - void occaCopyMemToMem(occaMemory dest, occaMemory src, const occaUDim_t bytes, const occaUDim_t destOffset, diff --git a/src/c/uva.cpp b/src/c/uva.cpp deleted file mode 100644 index b2eb48278..000000000 --- a/src/c/uva.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include -#include -#include - -OCCA_START_EXTERN_C - -bool occaIsManaged(void *ptr) { - return occa::isManaged(ptr); -} - -void occaStartManaging(void *ptr) { - occa::startManaging(ptr); -} - -void occaStopManaging(void *ptr) { - occa::stopManaging(ptr); -} - -void occaSyncToDevice(void *ptr, - const occaUDim_t bytes) { - occa::syncToDevice(ptr, bytes); -} - -void occaSyncToHost(void *ptr, - const occaUDim_t bytes) { - occa::syncToHost(ptr, bytes); -} - -bool occaNeedsSync(void *ptr) { - return occa::needsSync(ptr); -} - -void occaSync(void *ptr) { - occa::sync(ptr); -} - -void occaDontSync(void *ptr) { - occa::dontSync(ptr); -} - -void occaFreeUvaPtr(void *ptr) { - occa::freeUvaPtr(ptr); -} - -OCCA_END_EXTERN_C diff --git a/src/core/base.cpp b/src/core/base.cpp index cf65df89c..7e5604080 100644 --- a/src/core/base.cpp +++ b/src/core/base.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace occa { //---[ Device Functions ]------------- @@ -118,20 +117,6 @@ namespace occa { return getDevice().malloc(entries, dtype::byte, src, props); } - void* umalloc(const dim_t entries, - const dtype_t &dtype, - const void *src, - const occa::json &props) { - return getDevice().umalloc(entries, dtype, src, props); - } - - template <> - void* umalloc(const dim_t entries, - const void *src, - const occa::json &props) { - return getDevice().umalloc(entries, dtype::byte, src, props); - } - occa::memory wrapMemory(const void *ptr, const dim_t entries, const dtype_t &dtype, @@ -146,54 +131,6 @@ namespace occa { return getDevice().wrapMemory(ptr, entries, dtype::byte, props); } - void memcpy(void *dest, const void *src, - const dim_t bytes, - const occa::json &props) { - - ptrRangeMap::iterator srcIt = uvaMap.find(const_cast(src)); - ptrRangeMap::iterator destIt = uvaMap.find(dest); - - occa::modeMemory_t *srcMem = ((srcIt != uvaMap.end()) ? (srcIt->second) : NULL); - occa::modeMemory_t *destMem = ((destIt != uvaMap.end()) ? (destIt->second) : NULL); - - const udim_t srcOff = (srcMem - ? (((char*) src) - srcMem->uvaPtr) - : 0); - const udim_t destOff = (destMem - ? (((char*) dest) - destMem->uvaPtr) - : 0); - - const bool usingSrcPtr = (!srcMem || - ((srcMem->isManaged() && !srcMem->inDevice()))); - const bool usingDestPtr = (!destMem || - ((destMem->isManaged() && !destMem->inDevice()))); - - if (usingSrcPtr && usingDestPtr) { - udim_t bytes_ = bytes; - if (bytes == -1) { - OCCA_ERROR("Unable to determine bytes to copy", - srcMem || destMem); - bytes_ = (srcMem - ? srcMem->size - : destMem->size); - } - - ::memcpy(dest, src, bytes_); - return; - } - - if (usingSrcPtr) { - destMem->copyFrom(src, bytes, destOff, props); - } else if (usingDestPtr) { - srcMem->copyTo(dest, bytes, srcOff, props); - } else { - // Auto-detects peer-to-peer stuff - occa::memory srcMemory(srcMem); - occa::memory destMemory(destMem); - destMemory.copyFrom(srcMemory, bytes, destOff, srcOff, props); - } - } - void memcpy(memory dest, const void *src, const dim_t bytes, const dim_t offset, @@ -219,11 +156,6 @@ namespace occa { dest.copyFrom(src, bytes, destOffset, srcOffset, props); } - void memcpy(void *dest, const void *src, - const occa::json &props) { - memcpy(dest, src, -1, props); - } - void memcpy(memory dest, const void *src, const occa::json &props) { memcpy(dest, src, -1, 0, props); diff --git a/src/core/device.cpp b/src/core/device.cpp index 891bdd684..4cb536659 100644 --- a/src/core/device.cpp +++ b/src/core/device.cpp @@ -121,7 +121,7 @@ namespace occa { return (modeDevice != other.modeDevice); } - bool device::isInitialized() { + bool device::isInitialized() const { return (modeDevice != NULL); } @@ -233,25 +233,9 @@ namespace occa { } void device::finish() { - if (!modeDevice) { - return; - } - if (modeDevice->hasSeparateMemorySpace()) { - const size_t staleEntries = uvaStaleMemory.size(); - for (size_t i = 0; i < staleEntries; ++i) { - occa::modeMemory_t *mem = uvaStaleMemory[i]; - - mem->copyTo(mem->uvaPtr, mem->size, 0, "async: true"); - - mem->memInfo &= ~uvaFlag::inDevice; - mem->memInfo &= ~uvaFlag::isStale; - } - if (staleEntries) { - uvaStaleMemory.clear(); - } + if (modeDevice) { + modeDevice->finish(); } - - modeDevice->finish(); } bool device::hasSeparateMemorySpace() { @@ -489,53 +473,6 @@ namespace occa { return malloc(entries, dtype::byte, NULL, props); } - void* device::umalloc(const dim_t entries, - const dtype_t &dtype, - const void *src, - const occa::json &props) { - void *ptr = umalloc(entries, dtype, occa::memory(), props); - - if (src && entries) { - const dim_t bytes = entries * dtype.bytes(); - ::memcpy(ptr, src, bytes); - } - - return ptr; - } - - void* device::umalloc(const dim_t entries, - const dtype_t &dtype, - const occa::memory src, - const occa::json &props) { - assertInitialized(); - - if (entries == 0) { - return NULL; - } - - occa::json memProps = memoryProperties(props); - - memory mem = malloc(entries, dtype, src, memProps); - mem.setDtype(dtype); - mem.dontUseRefs(); - mem.setupUva(); - - if (memProps.get("managed", true)) { - mem.startManaging(); - } - void *ptr = mem.modeMemory->uvaPtr; - if (src.size()) { - mem.copyTo(ptr); - } - return ptr; - } - - void* device::umalloc(const dim_t entries, - const dtype_t &dtype, - const occa::json &props) { - return umalloc(entries, dtype, NULL, props); - } - template <> occa::memory device::wrapMemory(const void *ptr, const dim_t entries, diff --git a/src/core/kernel.cpp b/src/core/kernel.cpp index a8dd2cbd1..dd0fc7cab 100644 --- a/src/core/kernel.cpp +++ b/src/core/kernel.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include diff --git a/src/core/kernelArg.cpp b/src/core/kernelArg.cpp index 878826fea..0e1ee35f6 100644 --- a/src/core/kernelArg.cpp +++ b/src/core/kernelArg.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -56,22 +55,6 @@ namespace occa { bool kernelArgData::isPointer() const { return value.isPointer(); } - - void kernelArgData::setupForKernelCall(const bool isConst) const { - if (!modeMemory || - !modeMemory->isManaged() || - !modeMemory->modeDevice->hasSeparateMemorySpace()) { - return; - } - if (!modeMemory->inDevice()) { - modeMemory->copyFrom(modeMemory->uvaPtr, modeMemory->size); - modeMemory->memInfo |= uvaFlag::inDevice; - } - if (!isConst && !modeMemory->isStale()) { - uvaStaleMemory.push_back(modeMemory); - modeMemory->memInfo |= uvaFlag::isStale; - } - } //==================================== //---[ kernelArg ]-------------------- @@ -129,13 +112,11 @@ namespace occa { } } - void kernelArg::addPointer(void *arg, - bool lookAtUva, bool argIsUva) { - addPointer(arg, sizeof(void*), lookAtUva, argIsUva); + void kernelArg::addPointer(void *arg) { + addPointer(arg, sizeof(void*)); } - void kernelArg::addPointer(void *arg, size_t bytes, - bool lookAtUva, bool argIsUva) { + void kernelArg::addPointer(void *arg, size_t bytes) { if (!arg) { args.push_back((primitive) nullptr); return; @@ -143,15 +124,6 @@ namespace occa { modeMemory_t *modeMemory = NULL; - if (argIsUva) { - modeMemory = (modeMemory_t*) arg; - } else if (lookAtUva) { - ptrRangeMap::iterator it = uvaMap.find(arg); - if (it != uvaMap.end()) { - modeMemory = it->second; - } - } - if (modeMemory) { addMemory(modeMemory); } else { diff --git a/src/core/memory.cpp b/src/core/memory.cpp index d95ac261b..54b0d7f20 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1,26 +1,14 @@ #include #include #include -#include #include #include #include -#include namespace occa { memory::memory() : modeMemory(NULL) {} - memory::memory(void *uvaPtr) : - modeMemory(NULL) { - ptrRangeMap::iterator it = uvaMap.find(uvaPtr); - if (it != uvaMap.end()) { - setModeMemory(it->second); - } else { - setModeMemory((modeMemory_t*) uvaPtr); - } - } - memory::memory(modeMemory_t *modeMemory_) : modeMemory(NULL) { setModeMemory(modeMemory_); @@ -156,131 +144,6 @@ namespace occa { return modeMemory->size / modeMemory->dtype_->bytes(); } - bool memory::isManaged() const { - return (modeMemory && modeMemory->isManaged()); - } - - bool memory::inDevice() const { - return (modeMemory && modeMemory->inDevice()); - } - - bool memory::isStale() const { - return (modeMemory && modeMemory->isStale()); - } - - void memory::setupUva() { - if (!modeMemory) { - return; - } - if ( !(modeMemory->modeDevice->hasSeparateMemorySpace()) ) { - modeMemory->uvaPtr = modeMemory->ptr; - } else { - modeMemory->uvaPtr = (char*) sys::malloc(modeMemory->size); - } - - ptrRange range; - range.start = modeMemory->uvaPtr; - range.end = (range.start + modeMemory->size); - - uvaMap[range] = modeMemory; - modeMemory->modeDevice->uvaMap[range] = modeMemory; - - // Needed for kernelArg.void_ -> modeMemory checks - if (modeMemory->uvaPtr != modeMemory->ptr) { - uvaMap[modeMemory->ptr] = modeMemory; - } - } - - void memory::startManaging() { - if (modeMemory) { - modeMemory->memInfo |= uvaFlag::isManaged; - } - } - - void memory::stopManaging() { - if (modeMemory) { - modeMemory->memInfo &= ~uvaFlag::isManaged; - } - } - - void memory::syncToDevice(const dim_t bytes, - const dim_t offset) { - assertInitialized(); - - udim_t bytes_ = ((bytes == -1) ? modeMemory->size : bytes); - - OCCA_ERROR("Trying to copy negative bytes (" << bytes << ")", - bytes >= -1); - OCCA_ERROR("Cannot have a negative offset (" << offset << ")", - offset >= 0); - - if (bytes_ == 0) { - return; - } - - OCCA_ERROR("Memory has size [" << modeMemory->size << "]," - << " trying to access [" << offset << ", " << (offset + bytes_) << "]", - (bytes_ + offset) <= modeMemory->size); - - if (!modeMemory->modeDevice->hasSeparateMemorySpace()) { - return; - } - - copyFrom(modeMemory->uvaPtr, bytes_, offset); - - modeMemory->memInfo |= uvaFlag::inDevice; - modeMemory->memInfo &= ~uvaFlag::isStale; - - removeFromStaleMap(modeMemory); - } - - void memory::syncToHost(const dim_t bytes, - const dim_t offset) { - assertInitialized(); - - udim_t bytes_ = ((bytes == -1) ? modeMemory->size : bytes); - - OCCA_ERROR("Trying to copy negative bytes (" << bytes << ")", - bytes >= -1); - OCCA_ERROR("Cannot have a negative offset (" << offset << ")", - offset >= 0); - - if (bytes_ == 0) { - return; - } - - OCCA_ERROR("Memory has size [" << modeMemory->size << "]," - << " trying to access [" << offset << ", " << (offset + bytes_) << "]", - (bytes_ + offset) <= modeMemory->size); - - if (!modeMemory->modeDevice->hasSeparateMemorySpace()) { - return; - } - - copyTo(modeMemory->uvaPtr, bytes_, offset); - - modeMemory->memInfo &= ~uvaFlag::inDevice; - modeMemory->memInfo &= ~uvaFlag::isStale; - - removeFromStaleMap(modeMemory); - } - - bool memory::uvaIsStale() const { - return (modeMemory && modeMemory->isStale()); - } - - void memory::uvaMarkStale() { - if (modeMemory != NULL) { - modeMemory->memInfo |= uvaFlag::isStale; - } - } - - void memory::uvaMarkFresh() { - if (modeMemory != NULL) { - modeMemory->memInfo &= ~uvaFlag::isStale; - } - } - bool memory::operator == (const occa::memory &other) const { return (modeMemory == other.modeMemory); } @@ -325,9 +188,6 @@ namespace occa { mm.modeDevice = modeMemory->modeDevice; mm.size = bytes; mm.isOrigin = false; - if (modeMemory->uvaPtr) { - mm.uvaPtr = (modeMemory->uvaPtr + offset_); - } return m; } @@ -491,22 +351,6 @@ namespace occa { if (modeMemory->isOrigin) { modeDevice->bytesAllocated -= (modeMemory->size); - if (modeMemory->uvaPtr) { - void *memPtr = modeMemory->ptr; - void *uvaPtr = modeMemory->uvaPtr; - - uvaMap.erase(uvaPtr); - modeDevice->uvaMap.erase(uvaPtr); - - // CPU case where memory is shared - if (uvaPtr != memPtr) { - uvaMap.erase(memPtr); - modeDevice->uvaMap.erase(memPtr); - - sys::free(uvaPtr); - } - } - if (!freeMemory) { modeMemory->detach(); } diff --git a/src/fortran/occa_base_m.f90 b/src/fortran/occa_base_m.f90 index 5c002ab83..6f17d89c1 100644 --- a/src/fortran/occa_base_m.f90 +++ b/src/fortran/occa_base_m.f90 @@ -164,32 +164,6 @@ type(occaMemory) function occaTypedMalloc(entries, type, src, props) & type(C_void_ptr), value, intent(in) :: src type(occaJson), value :: props end function - - ! void* occaUMalloc(const occaUDim_t bytes, - ! const void *src, - ! occaJson props); - type(C_void_ptr) function occaUMalloc(bytes, src, props) & - bind(C, name="occaUMalloc") - import occaUDim_t, C_void_ptr, occaJson - implicit none - integer(occaUDim_t), value, intent(in) :: bytes - type(C_void_ptr), value, intent(in) :: src - type(occaJson), value :: props - end function - - ! void* occaTypedUMalloc(const occaUDim_t entries, - ! const occaDtype type, - ! const void *src, - ! occaJson props); - type(C_void_ptr) function occaTypedUMalloc(entries, type, src, props) & - bind(C, name="occaTypedUMalloc") - import occaUDim_t, occaDtype, C_void_ptr, occaJson - implicit none - integer(occaUDim_t), value, intent(in) :: entries - type(occaDtype), value, intent(in) :: type - type(C_void_ptr), value, intent(in) :: src - type(occaJson), value :: props - end function ! ====================================== end interface diff --git a/src/fortran/occa_device_m.f90 b/src/fortran/occa_device_m.f90 index 059dd882f..074d3c171 100644 --- a/src/fortran/occa_device_m.f90 +++ b/src/fortran/occa_device_m.f90 @@ -246,40 +246,6 @@ type(occaMemory) function occaDeviceTypedMalloc(device, & type(C_void_ptr), value, intent(in) :: src type(occaJson), value :: props end function - - ! void* occaDeviceUMalloc(occaDevice device, - ! const occaUDim_t bytes, - ! const void *src, - ! occaJson props); - type(C_void_ptr) function occaDeviceUMalloc(device, bytes, src, props) & - bind(C, name="occaDeviceUMalloc") - import C_void_ptr, occaDevice, occaJson, occaUDim_t - implicit none - type(occaDevice), value :: device - integer(occaUDim_t), value, intent(in) :: bytes - type(C_void_ptr), value, intent(in) :: src - type(occaJson), value :: props - end function - - ! void* occaDeviceTypedUMalloc(occaDevice device, - ! const occaUDim_t entries, - ! const occaDtype type, - ! const void *src, - ! occaJson props); - type(C_void_ptr) function occaDeviceTypedUMalloc(device, & - entries, & - dtype, & - src, & - props) & - bind(C, name="occaDeviceTypedUMalloc") - import C_void_ptr, occaDevice, occaJson, occaUDim_t, occaDtype - implicit none - type(occaDevice), value :: device - integer(occaUDim_t), value, intent(in) :: entries - type(occaDtype), value, intent(in) :: dtype - type(C_void_ptr), value, intent(in) :: src - type(occaJson), value :: props - end function ! ====================================== end interface diff --git a/src/fortran/occa_m.f90 b/src/fortran/occa_m.f90 index fc5acb617..ca120dd1b 100644 --- a/src/fortran/occa_m.f90 +++ b/src/fortran/occa_m.f90 @@ -8,7 +8,6 @@ module occa use occa_memory_m use occa_kernel_m use occa_kernelBuilder_m - use occa_uva_m use occa_scope_m use occa_json_m diff --git a/src/fortran/occa_memory_m.f90 b/src/fortran/occa_memory_m.f90 index 1ac298017..afc59067e 100644 --- a/src/fortran/occa_memory_m.f90 +++ b/src/fortran/occa_memory_m.f90 @@ -58,70 +58,6 @@ type(occaMemory) function occaMemorySlice(memory, offset, bytes) & integer(occaDim_t), value, intent(in) :: offset, bytes end function - ! ---[ UVA ]---------------------------- - ! bool occaMemoryIsManaged(occaMemory memory); - logical(kind=C_bool) function occaMemoryIsManaged(memory) & - bind(C, name="occaMemoryIsManaged") - import occaMemory, C_bool - implicit none - type(occaMemory), value :: memory - end function - - ! bool occaMemoryInDevice(occaMemory memory); - logical(kind=C_bool) function occaMemoryInDevice(memory) & - bind(C, name="occaMemoryInDevice") - import occaMemory, C_bool - implicit none - type(occaMemory), value :: memory - end function - - ! bool occaMemoryIsStale(occaMemory memory); - logical(kind=C_bool) function occaMemoryIsStale(memory) & - bind(C, name="occaMemoryIsStale") - import occaMemory, C_bool - implicit none - type(occaMemory), value :: memory - end function - - ! void occaMemoryStartManaging(occaMemory memory); - subroutine occaMemoryStartManaging(memory) & - bind(C, name="occaMemoryStartManaging") - import occaMemory - implicit none - type(occaMemory), value :: memory - end subroutine - - ! void occaMemoryStopManaging(occaMemory memory); - subroutine occaMemoryStopManaging(memory) & - bind(C, name="occaMemoryStopManaging") - import occaMemory - implicit none - type(occaMemory), value :: memory - end subroutine - - ! void occaMemorySyncToDevice(occaMemory memory, - ! const occaDim_t bytes, - ! const occaDim_t offset); - subroutine occaMemorySyncToDevice(memory, bytes, offset) & - bind(C, name="occaMemorySyncToDevice") - import occaMemory, occaDim_t - implicit none - type(occaMemory), value :: memory - integer(occaDim_t), value, intent(in) :: offset, bytes - end subroutine - - ! void occaMemorySyncToHost(occaMemory memory, - ! const occaDim_t bytes, - ! const occaDim_t offset); - subroutine occaMemorySyncToHost(memory, bytes, offset) & - bind(C, name="occaMemorySyncToHost") - import occaMemory, occaDim_t - implicit none - type(occaMemory), value :: memory - integer(occaDim_t), value, intent(in) :: offset, bytes - end subroutine - ! ====================================== - ! void occaMemcpy(void *dest, ! const void *src, ! const occaUDim_t bytes, diff --git a/src/fortran/occa_uva_m.f90 b/src/fortran/occa_uva_m.f90 deleted file mode 100644 index 6ce2e9e30..000000000 --- a/src/fortran/occa_uva_m.f90 +++ /dev/null @@ -1,72 +0,0 @@ -module occa_uva_m - ! occa/c/uva.h - - use occa_types_m - - implicit none - - interface - ! bool occaIsManaged(void *ptr); - logical(kind=C_bool) function occaIsManaged(ptr) & - bind(C, name="occaIsManaged") - import C_void_ptr, C_bool - implicit none - type(C_void_ptr), value :: ptr - end function - ! void occaStartManaging(void *ptr); - subroutine occaStartManaging(ptr) bind(C, name="occaStartManaging") - import C_void_ptr - implicit none - type(C_void_ptr), value :: ptr - end subroutine - ! void occaStopManaging(void *ptr); - subroutine occaStopManaging(ptr) bind(C, name="occaStopManaging") - import C_void_ptr - implicit none - type(C_void_ptr), value :: ptr - end subroutine - - ! void occaSyncToDevice(void *ptr, const occaUDim_t bytes); - subroutine occaSyncToDevice(ptr, bytes) bind(C, name="occaSyncToDevice") - import C_void_ptr, occaUDim_t - implicit none - type(C_void_ptr), value :: ptr - integer(occaUDim_t), value :: bytes - end subroutine - ! void occaSyncToHost(void *ptr, const occaUDim_t bytes); - subroutine occaSyncToHost(ptr, bytes) bind(C, name="occaSyncToHost") - import C_void_ptr, occaUDim_t - implicit none - type(C_void_ptr), value :: ptr - integer(occaUDim_t), value :: bytes - end subroutine - - ! bool occaNeedsSync(void *ptr); - logical(kind=C_bool) function occaNeedsSync(ptr) & - bind(C, name="occaNeedsSync") - import C_void_ptr, C_bool - implicit none - type(C_void_ptr), value :: ptr - end function - ! void occaSync(void *ptr); - subroutine occaSync(ptr) bind(C, name="occaSync") - import C_void_ptr - implicit none - type(C_void_ptr), value :: ptr - end subroutine - ! void occaDontSync(void *ptr); - subroutine occaDontSync(ptr) bind(C, name="occaDontSync") - import C_void_ptr - implicit none - type(C_void_ptr), value :: ptr - end subroutine - - ! void occaFreeUvaPtr(void *ptr); - subroutine occaFreeUvaPtr(ptr) bind(C, name="occaFreeUvaPtr") - import C_void_ptr - implicit none - type(C_void_ptr), value :: ptr - end subroutine - end interface - -end module occa_uva_m diff --git a/src/functional/scope.cpp b/src/functional/scope.cpp index df653074b..b8f8783fa 100644 --- a/src/functional/scope.cpp +++ b/src/functional/scope.cpp @@ -1,5 +1,6 @@ #include +#include #include namespace occa { @@ -47,7 +48,11 @@ namespace occa { } occa::device scope::getDevice() const { - return device; + return ( + device.isInitialized() + ? device + : occa::getDevice() + ); } int scope::size() const { diff --git a/src/occa/internal/c/types.cpp b/src/occa/internal/c/types.cpp index 8a05ce648..7fdfea399 100644 --- a/src/occa/internal/c/types.cpp +++ b/src/occa/internal/c/types.cpp @@ -2,7 +2,6 @@ #include #include -#include namespace occa { namespace c { @@ -398,8 +397,7 @@ namespace occa { switch (value.type) { case occa::c::typeType::ptr: { arg.addPointer(value.value.ptr, - value.bytes, - true, false); + value.bytes); break; } case occa::c::typeType::int8_: { @@ -434,14 +432,12 @@ namespace occa { } case occa::c::typeType::struct_: { arg.addPointer(value.value.ptr, - value.bytes, - false, false); + value.bytes); break; } case occa::c::typeType::string: { arg.addPointer(value.value.ptr, - value.bytes, - false, false); + value.bytes); break; } case occa::c::typeType::memory: { @@ -586,14 +582,6 @@ namespace occa { return occa::c::memory(value).dtype(); case occa::c::typeType::null_: return dtype::void_; - case occa::c::typeType::ptr: { - occa::modeMemory_t* mem = uvaToMemory(value.value.ptr); - if (mem) { - return *(mem->dtype_); - } - OCCA_FORCE_ERROR("Invalid pointer type"); - return dtype::none; - } default: OCCA_FORCE_ERROR("Invalid value type"); return dtype::none; diff --git a/src/occa/internal/core/device.hpp b/src/occa/internal/core/device.hpp index ef3b8fe86..4e8e2e2af 100644 --- a/src/occa/internal/core/device.hpp +++ b/src/occa/internal/core/device.hpp @@ -4,7 +4,6 @@ #include #include #include -#include #include namespace occa { @@ -20,9 +19,6 @@ namespace occa { gc::ring_t streamRing; gc::ring_t streamTagRing; - ptrRangeMap uvaMap; - memoryVector uvaStaleMemory; - stream currentStream; std::vector streams; diff --git a/src/occa/internal/core/kernel.cpp b/src/occa/internal/core/kernel.cpp index aceba9288..cc2ec0bc2 100644 --- a/src/occa/internal/core/kernel.cpp +++ b/src/occa/internal/core/kernel.cpp @@ -88,56 +88,45 @@ namespace occa { && properties.get("type_validation", true) ); - if (validateTypes) { - const int metaArgc = (int) metadata.arguments.size(); - - OCCA_ERROR("(" << hash << ":" << name << ") Kernel expects [" - << metaArgc << "] argument" - << (metaArgc != 1 ? "s," : ",") - << " received [" - << argc << ']', - argc == metaArgc); - - // TODO: Get original arg # - for (int i = 0; i < argc; ++i) { - kernelArgData &arg = arguments[i]; - lang::argMetadata_t &argInfo = metadata.arguments[i]; - - modeMemory_t *mem = arg.getModeMemory(); - const bool isNull = arg.value.isNull(); - const bool isPtr = mem || isNull; - if (isPtr != argInfo.isPtr) { - if (argInfo.isPtr) { - OCCA_FORCE_ERROR("(" << hash << ":" << name << ") Kernel expects an occa::memory for argument [" - << (i + 1) << "]"); - } else { - OCCA_FORCE_ERROR("(" << hash << ":" << name << ") Kernel expects a non-occa::memory type for argument [" - << (i + 1) << "]"); - } - } - - if (!isPtr || isNull) { - continue; - } - - OCCA_ERROR("(" << hash << ":" << name << ") Argument [" << (i + 1) << "] has wrong runtime type.\n" - << "Expected type: " << argInfo.dtype << '\n' - << "Received type: " << *(mem->dtype_) << '\n', - mem->dtype_->canBeCastedTo(argInfo.dtype)); - - arg.setupForKernelCall(argInfo.isConst); - } + if (!validateTypes) { return; } - // Non-OKL kernel setup - // All memory arguments are expected to be non-const for UVA purposes + const int metaArgc = (int) metadata.arguments.size(); + + OCCA_ERROR("(" << hash << ":" << name << ") Kernel expects [" + << metaArgc << "] argument" + << (metaArgc != 1 ? "s," : ",") + << " received [" + << argc << ']', + argc == metaArgc); + + // TODO: Get original arg # for (int i = 0; i < argc; ++i) { kernelArgData &arg = arguments[i]; + lang::argMetadata_t &argInfo = metadata.arguments[i]; + modeMemory_t *mem = arg.getModeMemory(); - if (mem) { - arg.setupForKernelCall(false); + const bool isNull = arg.value.isNull(); + const bool isPtr = mem || isNull; + if (isPtr != argInfo.isPtr) { + if (argInfo.isPtr) { + OCCA_FORCE_ERROR("(" << hash << ":" << name << ") Kernel expects an occa::memory for argument [" + << (i + 1) << "]"); + } else { + OCCA_FORCE_ERROR("(" << hash << ":" << name << ") Kernel expects a non-occa::memory type for argument [" + << (i + 1) << "]"); + } } + + if (!isPtr || isNull) { + continue; + } + + OCCA_ERROR("(" << hash << ":" << name << ") Argument [" << (i + 1) << "] has wrong runtime type.\n" + << "Expected type: " << argInfo.dtype << '\n' + << "Received type: " << *(mem->dtype_) << '\n', + mem->dtype_->canBeCastedTo(argInfo.dtype)); } } diff --git a/src/occa/internal/core/memory.cpp b/src/occa/internal/core/memory.cpp index df654818f..079ee58cd 100644 --- a/src/occa/internal/core/memory.cpp +++ b/src/occa/internal/core/memory.cpp @@ -1,15 +1,12 @@ #include #include -#include namespace occa { modeMemory_t::modeMemory_t(modeDevice_t *modeDevice_, udim_t size_, const occa::json &properties_) : - memInfo(uvaFlag::none), properties(properties_), ptr(NULL), - uvaPtr(NULL), modeDevice(modeDevice_), dtype_(&dtype::byte), size(size_), @@ -49,16 +46,4 @@ namespace occa { bool modeMemory_t::needsFree() const { return memoryRing.needsFree(); } - - bool modeMemory_t::isManaged() const { - return (memInfo & uvaFlag::isManaged); - } - - bool modeMemory_t::inDevice() const { - return (memInfo & uvaFlag::inDevice); - } - - bool modeMemory_t::isStale() const { - return (memInfo & uvaFlag::isStale); - } } diff --git a/src/occa/internal/core/memory.hpp b/src/occa/internal/core/memory.hpp index ed02cd463..883cca3c3 100644 --- a/src/occa/internal/core/memory.hpp +++ b/src/occa/internal/core/memory.hpp @@ -10,13 +10,11 @@ namespace occa { class modeMemory_t : public gc::ringEntry_t { public: - int memInfo; occa::json properties; gc::ring_t memoryRing; char *ptr; - char *uvaPtr; occa::modeDevice_t *modeDevice; @@ -33,10 +31,6 @@ namespace occa { void removeMemoryRef(memory *mem); bool needsFree() const; - bool isManaged() const; - bool inDevice() const; - bool isStale() const; - //---[ Virtual Methods ]------------ virtual ~modeMemory_t() = 0; diff --git a/src/occa/internal/utils/uva.cpp b/src/occa/internal/utils/uva.cpp deleted file mode 100644 index 5bcf8f2f3..000000000 --- a/src/occa/internal/utils/uva.cpp +++ /dev/null @@ -1,88 +0,0 @@ -#include - -#include -#include -#include -#include - -namespace occa { - ptrRangeMap uvaMap; - memoryVector uvaStaleMemory; - - //---[ ptrRange ]--------------------- - ptrRange::ptrRange() : - start(NULL), - end(NULL) {} - - ptrRange::ptrRange(void *ptr, const udim_t bytes) : - start((char*) ptr), - end(((char*) ptr) + bytes) {} - - ptrRange::ptrRange(const ptrRange &other) : - start(other.start), - end(other.end) {} - - ptrRange& ptrRange::operator = (const ptrRange &other) { - start = other.start; - end = other.end; - - return *this; - } - - bool ptrRange::operator == (const ptrRange &other) const { - return ((start < other.end) && - (end > other.start)); - } - - bool ptrRange::operator != (const ptrRange &other) const { - return ((start >= other.end) || - (end <= other.start)); - } - - int operator < (const ptrRange &a, const ptrRange &b) { - return ((a != b) && (a.start < b.start)); - } - - std::ostream& operator << (std::ostream& out, - const ptrRange &range) { - out << '[' - << (void*) range.start << ", " << (void*) range.end - << ']'; - return out; - } - //==================================== - - - //---[ UVA ]-------------------------- - void removeFromStaleMap(void *ptr) { - ptrRangeMap::iterator it = uvaMap.find(ptr); - if (it == uvaMap.end()) { - return; - } - - memory m(it->second); - if (!m.uvaIsStale()) { - return; - } - - removeFromStaleMap(m.getModeMemory()); - } - - void removeFromStaleMap(modeMemory_t *mem) { - if (!mem) { - return; - } - - occa::memory m(mem); - const size_t staleEntries = uvaStaleMemory.size(); - - for (size_t i = 0; i < staleEntries; ++i) { - if (uvaStaleMemory[i] == mem) { - m.uvaMarkFresh(); - uvaStaleMemory.erase(uvaStaleMemory.begin() + i); - break; - } - } - } - //==================================== -} diff --git a/src/occa/internal/utils/uva.hpp b/src/occa/internal/utils/uva.hpp deleted file mode 100644 index 673a59439..000000000 --- a/src/occa/internal/utils/uva.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef OCCA_INTERNAL_UTILS_UVA_HEADER -#define OCCA_INTERNAL_UTILS_UVA_HEADER - -#include - -namespace occa { - namespace uvaFlag { - static const int none = 0; - static const int isManaged = (1 << 0); - static const int inDevice = (1 << 1); - static const int isStale = (1 << 2); - } - - class device; - class memory; - class modeMemory_t; - class ptrRange; - - typedef std::map ptrRangeMap; - typedef std::vector memoryVector; - - extern ptrRangeMap uvaMap; - extern memoryVector uvaStaleMemory; - - //---[ ptrRange ]--------------------- - class ptrRange { - public: - char *start, *end; - - ptrRange(); - ptrRange(void *ptr, const udim_t bytes = 0); - ptrRange(const ptrRange &other); - - ptrRange& operator = (const ptrRange &other); - bool operator == (const ptrRange &other) const; - bool operator != (const ptrRange &other) const; - }; - - int operator < (const ptrRange &a, - const ptrRange &b); - - std::ostream& operator << (std::ostream& out, - const ptrRange &range); - //==================================== - - //---[ UVA ]-------------------------- - void removeFromStaleMap(void *ptr); - void removeFromStaleMap(modeMemory_t *mem); - //==================================== -} - -#endif diff --git a/src/utils/uva.cpp b/src/utils/uva.cpp deleted file mode 100644 index 5031e85ab..000000000 --- a/src/utils/uva.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - -namespace occa { - occa::modeMemory_t* uvaToMemory(void *ptr) { - if (!ptr) { - return NULL; - } - ptrRangeMap::iterator it = uvaMap.find(ptr); - return (it == uvaMap.end()) ? NULL : it->second; - } - - bool isManaged(void *ptr) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - if (mem) { - return (mem->memInfo & uvaFlag::isManaged); - } - return false; - } - - void startManaging(void *ptr) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - if (mem) { - mem->memInfo |= uvaFlag::isManaged; - } - } - - void stopManaging(void *ptr) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - if (mem) { - mem->memInfo &= ~uvaFlag::isManaged; - } - } - - void syncToDevice(void *ptr, const udim_t bytes) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - if (mem) { - syncMemToDevice(mem, bytes, ptrDiff(mem->uvaPtr, ptr)); - } - } - - void syncToHost(void *ptr, const udim_t bytes) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - if (mem) { - syncMemToHost(mem, bytes, ptrDiff(mem->uvaPtr, ptr)); - } - } - - void syncMemToDevice(occa::modeMemory_t *mem, - const udim_t bytes, - const udim_t offset) { - - if (mem) { - occa::memory(mem).syncToDevice(bytes, offset); - } - } - - void syncMemToHost(occa::modeMemory_t *mem, - const udim_t bytes, - const udim_t offset) { - - if (mem) { - occa::memory(mem).syncToHost(bytes, offset); - } - } - - bool needsSync(void *ptr) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - return mem ? mem->isStale() : false; - } - - void sync(void *ptr) { - occa::modeMemory_t *mem = uvaToMemory(ptr); - if (mem) { - if (mem->inDevice()) { - syncMemToHost(mem); - } else { - syncMemToDevice(mem); - } - } - } - - void dontSync(void *ptr) { - removeFromStaleMap(ptr); - } - - void freeUvaPtr(void *ptr) { - if (!ptr) { - return; - } - modeMemory_t *modeMemory = uvaToMemory(ptr); - if (modeMemory) { - occa::memory(modeMemory).free(); - return; - } - OCCA_FORCE_ERROR("Freeing a non-uva pointer"); - } -} diff --git a/tests/files/argKernel.okl b/tests/files/argKernel.okl index 7536d6c05..4171d9e00 100644 --- a/tests/files/argKernel.okl +++ b/tests/files/argKernel.okl @@ -1,6 +1,5 @@ @kernel void argKernel(void *nullPtr, int *mem, - int *uvaPtr, char i8, char u8, short i16, @@ -18,7 +17,6 @@ printf( "nullPtr: %p\n" "mem: %d\n" - "uvaPtr: %d\n" "i8: %d\n" "u8: %d\n" "i16: %d\n" @@ -33,7 +31,6 @@ "str: %s\n", nullPtr, (int) mem[0], - (int) uvaPtr[0], (int) i8, (int) u8, (int) i16, diff --git a/tests/src/c/base.cpp b/tests/src/c/base.cpp index e2654730f..853158620 100644 --- a/tests/src/c/base.cpp +++ b/tests/src/c/base.cpp @@ -83,17 +83,6 @@ void testMemoryMethods() { bytes); occaFree(&mem); - // umalloc - void *ptr = occaUMalloc(bytes, - NULL, - occaDefault); - occaFreeUvaPtr(ptr); - - ptr = occaUMalloc(bytes, - NULL, - props); - occaFreeUvaPtr(ptr); - occaFree(&props); } diff --git a/tests/src/c/device.cpp b/tests/src/c/device.cpp index 7b87fbbcc..2b556f0a4 100644 --- a/tests/src/c/device.cpp +++ b/tests/src/c/device.cpp @@ -116,7 +116,7 @@ void testMemoryMethods() { ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); - // Test malloc + umalloc + // Test malloc occaMemory mem1 = occaDeviceMalloc(device, memBytes, NULL, occaDefault); allocatedBytes += memBytes; @@ -131,15 +131,9 @@ void testMemoryMethods() { ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); - void *ptr1 = occaDeviceUMalloc(device, memBytes, NULL, occaDefault); - allocatedBytes += memBytes; - ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); - void *ptr2 = occaDeviceUMalloc(device, memBytes, NULL, props); - allocatedBytes += memBytes; - ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); @@ -156,16 +150,9 @@ void testMemoryMethods() { ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); - occaFreeUvaPtr(ptr1); - allocatedBytes -= memBytes; - ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); - - occaFreeUvaPtr(ptr2); - allocatedBytes -= memBytes; - ASSERT_EQ((size_t) occaDeviceMemoryAllocated(device), allocatedBytes); diff --git a/tests/src/c/kernel.cpp b/tests/src/c/kernel.cpp index 69e94c24b..dbb719e67 100644 --- a/tests/src/c/kernel.cpp +++ b/tests/src/c/kernel.cpp @@ -109,17 +109,15 @@ void testRun() { int value = 1; occaMemory mem = occaMalloc(1 * sizeof(int), &value, occaDefault); value = 2; - int *uvaPtr = (int*) occaUMalloc(1 * sizeof(int), &value, occaDefault); int xy[2] = {13, 14}; std::string str = "fifteen"; // Good argument types occaKernelRunN( - argKernel, 15, + argKernel, 14, occaNull, mem, - occaPtr(uvaPtr), occaInt8(3), occaUInt8(4), occaInt16(5), @@ -138,7 +136,6 @@ void testRun() { occaKernelClearArgs(argKernel); occaKernelPushArg(argKernel, occaNull); occaKernelPushArg(argKernel, mem); - occaKernelPushArg(argKernel, occaPtr(uvaPtr)); occaKernelPushArg(argKernel, occaInt8(3)); occaKernelPushArg(argKernel, occaUInt8(4)); occaKernelPushArg(argKernel, occaInt16(5)); @@ -154,10 +151,9 @@ void testRun() { occaKernelRunFromArgs(argKernel); // Test array call - occaType args[15] = { + occaType args[14] = { occaNull, mem, - occaPtr(uvaPtr), occaInt8(3), occaUInt8(4), occaInt16(5), @@ -172,7 +168,7 @@ void testRun() { occaString(str.c_str()) }; - occaKernelRunWithArgs(argKernel, 15, args); + occaKernelRunWithArgs(argKernel, 14, args); // Bad argument types ASSERT_THROW( @@ -190,7 +186,4 @@ void testRun() { ASSERT_THROW( occaKernelRunN(argKernel, 1, occaDefault); ); - ASSERT_THROW( - occaKernelRunN(argKernel, 1, uvaPtr); - ); } diff --git a/tests/src/c/memory.cpp b/tests/src/c/memory.cpp index 21f93162d..84f38d3ad 100644 --- a/tests/src/c/memory.cpp +++ b/tests/src/c/memory.cpp @@ -6,12 +6,10 @@ #include void testInit(); -void testUvaMethods(); void testCopyMethods(); int main(const int argc, const char **argv) { testInit(); - testUvaMethods(); testCopyMethods(); return 0; @@ -77,47 +75,6 @@ void testInit() { delete [] data; } -void testUvaMethods() { - // Test with uninitialized memory - occaMemory mem = occaUndefined; - - ASSERT_FALSE(occaMemoryIsManaged(mem)); - ASSERT_FALSE(occaMemoryInDevice(mem)); - ASSERT_FALSE(occaMemoryIsStale(mem)); - - occaMemoryStartManaging(mem); - ASSERT_FALSE(occaMemoryIsManaged(mem)); - - occaMemoryStopManaging(mem); - ASSERT_FALSE(occaMemoryIsManaged(mem)); - - ASSERT_THROW( - occaMemorySyncToDevice(mem, occaAllBytes, 0); - ); - - ASSERT_THROW( - occaMemorySyncToHost(mem, occaAllBytes, 0); - ); - - // Test with memory - mem = occaMalloc(10 * sizeof(int), NULL, occaDefault); - - ASSERT_FALSE(occaMemoryIsManaged(mem)); - ASSERT_FALSE(occaMemoryInDevice(mem)); - ASSERT_FALSE(occaMemoryIsStale(mem)); - - occaMemoryStartManaging(mem); - ASSERT_TRUE(occaMemoryIsManaged(mem)); - - occaMemoryStopManaging(mem); - ASSERT_FALSE(occaMemoryIsManaged(mem)); - - occaMemorySyncToDevice(mem, occaAllBytes, 0); - occaMemorySyncToHost(mem, occaAllBytes, 0); - - occaFree(&mem); -} - void testCopyMethods() { const size_t bytes2 = 2 * sizeof(int); int *data2 = new int[2]; @@ -191,40 +148,7 @@ void testCopyMethods() { occaFree(&mem2); occaFree(&mem4); - // UVA memory copy - int *o_data2 = (int*) occaUMalloc(bytes2, data2, occaDefault); - int *o_data4 = (int*) occaUMalloc(bytes4, data4, occaDefault); - - o_data2[0] = -2; - o_data4[1] = -4; - - occaMemcpy(o_data2, o_data4 + 1, - 1 * sizeof(int), - occaDefault); - - occaMemcpy(data2, o_data2, - occaAllBytes, - props); - - ASSERT_EQ(data2[0], -4); - - o_data2[0] = 1; - occaMemcpy(o_data2, data2, - occaAllBytes, - props); - - ASSERT_EQ(o_data2[0], -4); - - // Unable to find 'all bytes' from 2 non-occa pointers - ASSERT_THROW( - occaMemcpy(data2, data4, - occaAllBytes, - occaDefault); - ); - delete [] data2; delete [] data4; - occaFreeUvaPtr(o_data2); - occaFreeUvaPtr(o_data4); occaFree(&props); } diff --git a/tests/src/core/kernel.cpp b/tests/src/core/kernel.cpp index f725b225f..0cc21db00 100644 --- a/tests/src/core/kernel.cpp +++ b/tests/src/core/kernel.cpp @@ -162,16 +162,12 @@ void testRun() { int value = 1; occa::memory mem = occa::malloc(1, &value); - value = 2; - int *uvaPtr = occa::umalloc(1, &value); - int xy[2] = {13, 14}; std::string str = "fifteen"; argKernel( occa::null, mem, - uvaPtr, (int8_t) 3, (uint8_t) 4, (int16_t) 5, @@ -185,6 +181,4 @@ void testRun() { xy, str.c_str() ); - - occa::freeUvaPtr(uvaPtr); } diff --git a/tests/src/internal/utils/uva.cpp b/tests/src/internal/utils/uva.cpp deleted file mode 100644 index dabd59eab..000000000 --- a/tests/src/internal/utils/uva.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include - -#include -#include - -void testPtrRange(); - -int main(const int argc, const char **argv) { - testPtrRange(); - - return 0; -} - -void testPtrRange() { - occa::ptrRange range = occa::ptrRange(); - ASSERT_EQ(range.start, - (char*) NULL); - ASSERT_EQ(range.end, - (char*) NULL); - - range = occa::ptrRange((void*) 10, - 10); - // [10,20) = [10,20) - ASSERT_EQ(range, - occa::ptrRange((void*) 10, - 10)); - // [10,20) !n [5, 10) - ASSERT_NEQ(range, - occa::ptrRange((void*) 5, - 5)); - // [10,20) !n [20, 30) - ASSERT_NEQ(range, - occa::ptrRange((void*) 20, - 10)); - // [10,20) n [15,25) - ASSERT_EQ(range, - occa::ptrRange((void*) 15, - 10)); - // [10,20) !n [0,5) - ASSERT_NEQ(range, - occa::ptrRange((void*) 0, - 5)); - // [10,20) !n [25,35) - ASSERT_NEQ(range, - occa::ptrRange((void*) 25, - 10)); - - // [10,20) == [11,16) - ASSERT_FALSE(range < occa::ptrRange((void*) 11, - 5)); - // [10,20) == [11,21) - ASSERT_FALSE(range < occa::ptrRange((void*) 11, - 10)); - // [10,20) > [0,5) - ASSERT_FALSE(range < occa::ptrRange((void*) 0, - 5)); - // [10,20) < [21,31) - ASSERT_TRUE(range < occa::ptrRange((void*) 21, - 10)); - - std::cout << "Testing ptrRange output: " << range << '\n'; -} diff --git a/tests/src/utils/uva.cpp b/tests/src/utils/uva.cpp deleted file mode 100644 index b0fd288ef..000000000 --- a/tests/src/utils/uva.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include - -#include - -void testUva(); -void testUvaNull(); - -int main(const int argc, const char **argv) { - testUva(); - testUvaNull(); - - return 0; -} - -void testUva() { - int *ptr = occa::umalloc(10); - - occa::modeMemory_t *modeMemory = occa::uvaToMemory(ptr); - - ASSERT_NEQ(ptr, - (int*) NULL); - ASSERT_TRUE(occa::memory(modeMemory).isInitialized()); - - // Managed - ASSERT_TRUE(occa::isManaged(ptr)); - occa::stopManaging(ptr); - ASSERT_FALSE(occa::isManaged(ptr)); - occa::startManaging(ptr); - ASSERT_TRUE(occa::isManaged(ptr)); - - // Sync - ASSERT_FALSE(occa::needsSync(ptr)); - - occa::syncToDevice(ptr); - occa::syncToHost(ptr); - - occa::syncMemToDevice(modeMemory); - occa::syncMemToHost(modeMemory); - - occa::sync(ptr); - occa::dontSync(ptr); - - occa::freeUvaPtr(ptr); - - ASSERT_EQ(occa::uvaToMemory(ptr), - (occa::modeMemory_t*) NULL); -} - -void testUvaNull() { - int *ptr = new int[2]; - - ASSERT_EQ(occa::uvaToMemory(NULL), - (occa::modeMemory_t*) NULL); - ASSERT_EQ(occa::uvaToMemory(ptr), - (occa::modeMemory_t*) NULL); - - occa::startManaging(NULL); - occa::startManaging(ptr); - - occa::stopManaging(NULL); - occa::stopManaging(ptr); - - occa::syncToDevice(NULL); - occa::syncToDevice(ptr); - - occa::syncToHost(NULL); - occa::syncToHost(ptr); - - occa::syncMemToDevice(NULL); - occa::syncMemToHost(NULL); - - ASSERT_FALSE(occa::needsSync(NULL)); - ASSERT_FALSE(occa::needsSync(ptr)); - - occa::sync(NULL); - occa::sync(ptr); - - occa::dontSync(NULL); - occa::dontSync(ptr); - - occa::freeUvaPtr(NULL); - ASSERT_THROW( - occa::freeUvaPtr(ptr); - ); - - delete [] ptr; -}