Skip to content

Commit

Permalink
Feature device synchronization (#609)
Browse files Browse the repository at this point in the history
* Adds `finishAll` function to synchronize all streams on a device.
* Adds `stream::finish` to synchronize a specific stream.
* Adds C and Fortran interfaces for both
  • Loading branch information
kris-rowe authored Jul 28, 2022
1 parent de6630f commit d30000b
Show file tree
Hide file tree
Showing 39 changed files with 191 additions and 63 deletions.
75 changes: 58 additions & 17 deletions examples/cpp/07_streams/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,75 @@ int main(int argc, const char **argv) {
ab[i] = 0;
}

occa::kernel addVectors;
occa::memory o_a, o_b, o_ab;
// The default stream
occa::stream streamA = occa::getStream();

// Another, new stream
occa::stream streamB = occa::createStream();

occa::stream streamA, streamB;
occa::memory o_a = occa::malloc<float>(entries);
occa::memory o_b = occa::malloc<float>(entries);
occa::memory o_ab = occa::malloc<float>(entries);

streamA = occa::getStream();
streamB = occa::createStream();

o_a = occa::malloc<float>(entries);
o_b = occa::malloc<float>(entries);
o_ab = occa::malloc<float>(entries);

addVectors = occa::buildKernel("addVectors.okl",
occa::kernel addVectors = occa::buildKernel("addVectors.okl",
"addVectors");

o_a.copyFrom(a);
o_b.copyFrom(b);
// Pass this property to make copies non-blocking on the host.
occa::json async_copy({{"async", true}});

// These copies will be submitted to the current
// stream, which is streamA--the default stream.
o_a.copyFrom(a,async_copy);
o_b.copyFrom(b,async_copy);

// Waits the copies in streamA to complete
streamA.finish();

// **IMPORTANT**
// Operating on overlaping memory regions simultaneously
// from different streams, without appropriate
// synchronization, leads to undefined behavior.

// Create *non-overlapping* memory slices for use by
// kernels launched in separate streams.
occa::memory o_a1 = o_a.slice(0); // First half of a
occa::memory o_a2 = o_a.slice(entries/2); // Second half of a

occa::memory o_b1 = o_b.slice(0);
occa::memory o_b2 = o_b.slice(entries/2);

occa::memory o_ab1 = o_ab.slice(0);
occa::memory o_ab2 = o_ab.slice(entries/2);

occa::setStream(streamA);
addVectors(entries, o_a, o_b, o_ab);
// This kernel launch is submitted to streamA.
// It operates on the first half of each vector.
addVectors(entries/2, o_a1, o_b1, o_ab1);

occa::setStream(streamB);
addVectors(entries, o_a, o_b, o_ab);
// This kernel launch is submitted to streamB.
// It operates on the second half of each vector.
addVectors(entries/2, o_a2, o_b2, o_ab2);

o_ab.copyTo(ab);
// The copy below will be submitted to streamB;
// however, we need to wait for the kernel
// submitted to streamA to finish since the
// entire vector is copied.
streamA.finish();
o_ab.copyTo(ab,async_copy);

for (int i = 0; i < entries; ++i)
// Wait for streamB to finish
streamB.finish();

// Verify the results
for (int i = 0; i < entries; ++i) {
std::cout << i << ": " << ab[i] << '\n';
}
for (int i = 0; i < entries; ++i) {
if (!occa::areBitwiseEqual(ab[i], a[i] + b[i])) {
throw 1;
}
}

delete [] a;
delete [] b;
Expand Down
2 changes: 2 additions & 0 deletions include/occa/c/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ occaUDim_t occaDeviceMemoryAllocated(occaDevice device);

void occaDeviceFinish(occaDevice device);

void occaDeviceFinishAll(occaDevice device);

bool occaDeviceHasSeparateMemorySpace(occaDevice device);

//---[ Stream ]-------------------------
Expand Down
14 changes: 14 additions & 0 deletions include/occa/c/stream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

ifndef OCCA_C_STREAM_HEADER
#define OCCA_C_STREAM_HEADER

#include <occa/c/defines.h>
#include <occa/c/types.h>

OCCA_START_EXTERN_C

void occaStreamFinish(occaStream stream);

OCCA_END_EXTERN_C

#endif
17 changes: 15 additions & 2 deletions include/occa/core/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,13 +349,26 @@ namespace occa {
* @startDoc{finish}
*
* Description:
* Finishes any asynchronous operation queued up on the device, such as
* [[async memory allocations|device.malloc]] or [[kernel calls|kernel.operator_parentheses]].
* Waits for all asynchronous operations, such as
* [[async memory allocations|device.malloc]] or [[kernel calls|kernel.operator_parentheses]],
* submitted to the current stream on this device to complete.
*
* @endDoc
*/
void finish();

/**
* @startDoc{finish}
*
* Description:
* Waits for all asynchronous operations, such as
* [[async memory allocations|device.malloc]] or [[kernel calls|kernel.operator_parentheses]],
* submitted to all streams on this device to complete.
*
* @endDoc
*/
void finishAll();

/**
* @startDoc{hasSeparateMemorySpace}
*
Expand Down
11 changes: 11 additions & 0 deletions include/occa/core/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ namespace occa {
* @endDoc
*/
void free();

/**
* @startDoc{finish}
*
* Description:
* Waits for all asynchronous operations, such as memory allocations
* or kernel calls, submitted to this device to complete.
*
* @endDoc
*/
void finish();
};

std::ostream& operator << (std::ostream &out,
Expand Down
2 changes: 2 additions & 0 deletions scripts/build/Make.fortran_rules
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ $(fObjPath)/occa_device_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_memory_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_kernel_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_kernelBuilder_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_stream_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_uva_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_scope_m.o: $(fObjPath)/occa_types_m.o
$(fObjPath)/occa_json_m.o: $(fObjPath)/occa_types_m.o
Expand All @@ -20,6 +21,7 @@ $(fObjPath)/occa_m.o: $(fObjPath)/fc_string_m.o \
$(fObjPath)/occa_memory_m.o \
$(fObjPath)/occa_kernel_m.o \
$(fObjPath)/occa_kernelBuilder_m.o \
$(fObjPath)/occa_stream_m.o \
$(fObjPath)/occa_uva_m.o \
$(fObjPath)/occa_scope_m.o \
$(fObjPath)/occa_json_m.o
Expand Down
4 changes: 4 additions & 0 deletions src/c/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ void occaDeviceFinish(occaDevice device) {
occa::c::device(device).finish();
}

void occaDeviceFinishAll(occaDevice device) {
occa::c::device(device).finishAll();
}

bool occaDeviceHasSeparateMemorySpace(occaDevice device) {
return (int) occa::c::device(device).hasSeparateMemorySpace();
}
Expand Down
11 changes: 11 additions & 0 deletions src/c/stream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include <occa/internal/c/types.hpp>
#include <occa/c/device.h>
#include <occa/c/dtype.h>

OCCA_START_EXTERN_C

void occaStreamFinish(occaStream stream) {
occa::c::stream(stream).finish();
}

OCCA_END_EXTERN_C
6 changes: 6 additions & 0 deletions src/core/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ namespace occa {
}
}

void device::finishAll() {
if (modeDevice) {
modeDevice->finishAll();
}
}

bool device::hasSeparateMemorySpace() {
return (modeDevice &&
modeDevice->hasSeparateMemorySpace());
Expand Down
4 changes: 4 additions & 0 deletions src/core/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ namespace occa {
modeStream = NULL;
}

void stream::finish() {
if(modeStream) modeStream->finish();
}

std::ostream& operator << (std::ostream &out,
const occa::stream &stream) {
out << stream.properties();
Expand Down
7 changes: 7 additions & 0 deletions src/fortran/occa_device_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ subroutine occaDeviceFinish(device) bind(C, name="occaDeviceFinish")
type(occaDevice), value :: device
end subroutine

! void occaDeviceFinishAll(occaDevice device);
subroutine occaDeviceFinishAll(device) bind(C, name="occaDeviceFinishAll")
import occaDevice
implicit none
type(occaDevice), value :: device
end subroutine

! bool occaDeviceHasSeparateMemorySpace(occaDevice device);
logical(kind=C_bool) function occaDeviceHasSeparateMemorySpace(device) &
bind(C, name="occaDeviceHasSeparateMemorySpace")
Expand Down
1 change: 1 addition & 0 deletions src/fortran/occa_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ module occa
use occa_memory_m
use occa_kernel_m
use occa_kernelBuilder_m
use occa_stream_m
use occa_scope_m
use occa_json_m

Expand Down
16 changes: 16 additions & 0 deletions src/fortran/occa_stream_m.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
module occa_stream_m
! occa/c/device.h

use occa_types_m
implicit none

interface
! void occaStreamFinish(occaStream stream);
subroutine occaStreamFinish(stream) bind(C, name="occaStreamFinish")
import occaStream
implicit none
type(occaStream), value :: stream
end subroutine
end interface

end module occa_stream_m
10 changes: 10 additions & 0 deletions src/occa/internal/core/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ namespace occa {
streamTagRing.removeRef(streamTag);
}

void modeDevice_t::finish() const {
currentStream.getModeStream()->finish();
}

void modeDevice_t::finishAll() const {
for(auto* stream : streams) {
if(stream) stream->finish();
}
}

hash_t modeDevice_t::versionedHash() const {
return (occa::hash(settings()["version"])
^ hash());
Expand Down
5 changes: 3 additions & 2 deletions src/occa/internal/core/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ namespace occa {
void addStreamTagRef(modeStreamTag_t *streamTag);
void removeStreamTagRef(modeStreamTag_t *streamTag);

void finish() const;
void finishAll() const;

//---[ Virtual Methods ]------------
virtual ~modeDevice_t() = 0;

virtual void finish() const = 0;

virtual bool hasSeparateMemorySpace() const = 0;

hash_t versionedHash() const;
Expand Down
1 change: 1 addition & 0 deletions src/occa/internal/core/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ namespace occa {

//---[ Virtual Methods ]------------
virtual ~modeStream_t() = 0;
virtual void finish() = 0;
//==================================
};
}
Expand Down
5 changes: 0 additions & 5 deletions src/occa/internal/modes/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,6 @@ namespace occa {
}
}

void device::finish() const {
OCCA_CUDA_ERROR("Device: Finish",
cuStreamSynchronize(getCuStream()));
}

bool device::hasSeparateMemorySpace() const {
return true;
}
Expand Down
2 changes: 0 additions & 2 deletions src/occa/internal/modes/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ namespace occa {
device(const occa::json &properties_);
virtual ~device();

virtual void finish() const;

virtual bool hasSeparateMemorySpace() const;

virtual hash_t hash() const;
Expand Down
5 changes: 5 additions & 0 deletions src/occa/internal/modes/cuda/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,10 @@ namespace occa {
);
}
}

void stream::finish() {
OCCA_CUDA_ERROR("Stream: Finish",
cuStreamSynchronize(cuStream));
}
}
}
1 change: 1 addition & 0 deletions src/occa/internal/modes/cuda/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace occa {
bool isWrapped_=false);

virtual ~stream();
void finish() override;
};
}
}
Expand Down
5 changes: 0 additions & 5 deletions src/occa/internal/modes/dpcpp/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,6 @@ namespace occa
setCompilerLinkerOptions(kernelProps);
}

void device::finish() const
{
getDpcppStream(currentStream).finish();
}

hash_t device::hash() const
{
if (!hash_.initialized)
Expand Down
2 changes: 0 additions & 2 deletions src/occa/internal/modes/dpcpp/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ namespace occa
device(const occa::json &properties_);
virtual ~device() = default;

virtual void finish() const override;

virtual inline bool hasSeparateMemorySpace() const override { return true; }

virtual hash_t hash() const override;
Expand Down
2 changes: 1 addition & 1 deletion src/occa/internal/modes/dpcpp/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace occa {

virtual ~stream()=default;

void finish();
void finish() override;

occa::dpcpp::streamTag memcpy(void *dest, const void *src, occa::udim_t num_bytes);
};
Expand Down
5 changes: 0 additions & 5 deletions src/occa/internal/modes/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,6 @@ namespace occa {

device::~device() { }

void device::finish() const {
OCCA_HIP_ERROR("Device: Finish",
hipStreamSynchronize(getHipStream()));
}

bool device::hasSeparateMemorySpace() const {
return true;
}
Expand Down
2 changes: 0 additions & 2 deletions src/occa/internal/modes/hip/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ namespace occa {
device(const occa::json &properties_);
virtual ~device();

virtual void finish() const;

virtual bool hasSeparateMemorySpace() const;

virtual hash_t hash() const;
Expand Down
Loading

0 comments on commit d30000b

Please sign in to comment.