Skip to content

Commit fd0491c

Browse files
[SYCL] Add nested calls detection to shortcut functions (#13659)
Original impl does not cover shortcut functions. This version has thread_local global simple type variable that could track nested call within some queue functions like submit, memset, memcpy and others. Shortcut functions use common part submitMemOpHelper where detection is also added. Reduction impl updated to eliminate nested call we did internally. It is even better since common logic with dependency tracking used in queue methods is not needed there. --------- Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
1 parent 05d29f3 commit fd0491c

File tree

7 files changed

+138
-74
lines changed

7 files changed

+138
-74
lines changed

sycl/include/sycl/reduction.hpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ using IsReduOptForFastAtomicFetch =
100100
std::bool_constant<false>;
101101
#else
102102
std::bool_constant<((is_sgenfloat_v<T> && sizeof(T) == 4) ||
103-
is_sgeninteger_v<T>)&&IsValidAtomicType<T>::value &&
103+
is_sgeninteger_v<T>) &&
104+
IsValidAtomicType<T>::value &&
104105
(IsPlus<T, BinaryOperation>::value ||
105106
IsMinimum<T, BinaryOperation>::value ||
106107
IsMaximum<T, BinaryOperation>::value ||
@@ -138,11 +139,12 @@ using IsReduOptForFastReduce =
138139
#ifdef SYCL_REDUCTION_DETERMINISTIC
139140
std::bool_constant<false>;
140141
#else
141-
std::bool_constant<(
142-
(is_sgeninteger_v<T> && (sizeof(T) == 4 || sizeof(T) == 8)) ||
143-
is_sgenfloat_v<T>)&&(IsPlus<T, BinaryOperation>::value ||
144-
IsMinimum<T, BinaryOperation>::value ||
145-
IsMaximum<T, BinaryOperation>::value)>;
142+
std::bool_constant<((is_sgeninteger_v<T> &&
143+
(sizeof(T) == 4 || sizeof(T) == 8)) ||
144+
is_sgenfloat_v<T>) &&
145+
(IsPlus<T, BinaryOperation>::value ||
146+
IsMinimum<T, BinaryOperation>::value ||
147+
IsMaximum<T, BinaryOperation>::value)>;
146148
#endif
147149

148150
// std::tuple seems to be a) too heavy and b) not copyable to device now
@@ -835,6 +837,10 @@ using __sycl_init_mem_for =
835837
std::conditional_t<std::is_same_v<KernelName, auto_name>, auto_name,
836838
reduction::InitMemKrn<KernelName>>;
837839

840+
__SYCL_EXPORT void
841+
addCounterInit(handler &CGH, std::shared_ptr<sycl::detail::queue_impl> &Queue,
842+
std::shared_ptr<int> &Counter);
843+
838844
template <typename T, class BinaryOperation, int Dims, size_t Extent,
839845
bool ExplicitIdentity, typename RedOutVar>
840846
class reduction_impl_algo {
@@ -1075,8 +1081,7 @@ class reduction_impl_algo {
10751081
std::shared_ptr<int> Counter(malloc_device<int>(1, q), Deleter);
10761082
CGH.addReduction(Counter);
10771083

1078-
auto Event = q.memset(Counter.get(), 0, sizeof(int));
1079-
CGH.depends_on(Event);
1084+
addCounterInit(CGH, CGH.MQueue, Counter);
10801085

10811086
return Counter.get();
10821087
}

sycl/source/detail/queue_impl.cpp

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@ namespace sycl {
2727
inline namespace _V1 {
2828
namespace detail {
2929
std::atomic<unsigned long long> queue_impl::MNextAvailableQueueID = 0;
30+
thread_local bool NestedCallsDetector = false;
31+
class NestedCallsTracker {
32+
public:
33+
NestedCallsTracker() {
34+
if (NestedCallsDetector)
35+
throw sycl::exception(
36+
make_error_code(errc::invalid),
37+
"Calls to sycl::queue::submit cannot be nested. Command group "
38+
"function objects should use the sycl::handler API instead.");
39+
NestedCallsDetector = true;
40+
}
41+
42+
~NestedCallsTracker() { NestedCallsDetector = false; }
43+
};
3044

3145
static std::vector<sycl::detail::pi::PiEvent>
3246
getPIEvents(const std::vector<sycl::event> &DepEvents) {
@@ -330,6 +344,46 @@ void queue_impl::addSharedEvent(const event &Event) {
330344
MEventsShared.push_back(Event);
331345
}
332346

347+
event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
348+
const std::shared_ptr<queue_impl> &Self,
349+
const std::shared_ptr<queue_impl> &PrimaryQueue,
350+
const std::shared_ptr<queue_impl> &SecondaryQueue,
351+
const detail::code_location &Loc,
352+
const SubmitPostProcessF *PostProcess) {
353+
handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue);
354+
Handler.saveCodeLoc(Loc);
355+
356+
{
357+
NestedCallsTracker tracker;
358+
CGF(Handler);
359+
}
360+
361+
// Scheduler will later omit events, that are not required to execute tasks.
362+
// Host and interop tasks, however, are not submitted to low-level runtimes
363+
// and require separate dependency management.
364+
const CG::CGTYPE Type = Handler.getType();
365+
event Event = detail::createSyclObjFromImpl<event>(
366+
std::make_shared<detail::event_impl>());
367+
368+
if (PostProcess) {
369+
bool IsKernel = Type == CG::Kernel;
370+
bool KernelUsesAssert = false;
371+
372+
if (IsKernel)
373+
// Kernel only uses assert if it's non interop one
374+
KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) &&
375+
ProgramManager::getInstance().kernelUsesAssert(
376+
Handler.MKernelName.c_str());
377+
finalizeHandler(Handler, Event);
378+
379+
(*PostProcess)(IsKernel, KernelUsesAssert, Event);
380+
} else
381+
finalizeHandler(Handler, Event);
382+
383+
addEvent(Event);
384+
return Event;
385+
}
386+
333387
template <typename HandlerFuncT>
334388
event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
335389
const std::vector<event> &DepEvents,
@@ -362,15 +416,19 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
362416
if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass(
363417
ExpandedDepEvents, MContext)) {
364418
if (MSupportsDiscardingPiEvents) {
419+
NestedCallsTracker tracker;
365420
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
366421
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
367422
return createDiscardedEvent();
368423
}
369424

370425
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
371426
auto EventImpl = detail::getSyclObjImpl(ResEvent);
372-
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
373-
&EventImpl->getHandleRef(), EventImpl);
427+
{
428+
NestedCallsTracker tracker;
429+
MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents),
430+
&EventImpl->getHandleRef(), EventImpl);
431+
}
374432

375433
if (MContext->is_host())
376434
return MDiscardEvents ? createDiscardedEvent() : event();

sycl/source/detail/queue_impl.hpp

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -849,53 +849,7 @@ class queue_impl {
849849
const std::shared_ptr<queue_impl> &PrimaryQueue,
850850
const std::shared_ptr<queue_impl> &SecondaryQueue,
851851
const detail::code_location &Loc,
852-
const SubmitPostProcessF *PostProcess) {
853-
// Flag used to detect nested calls to submit and report an error.
854-
thread_local static bool PreventSubmit = false;
855-
856-
if (PreventSubmit) {
857-
throw sycl::exception(
858-
make_error_code(errc::invalid),
859-
"Calls to sycl::queue::submit cannot be nested. Command group "
860-
"function objects should use the sycl::handler API instead.");
861-
}
862-
863-
handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue);
864-
Handler.saveCodeLoc(Loc);
865-
PreventSubmit = true;
866-
try {
867-
CGF(Handler);
868-
} catch (...) {
869-
PreventSubmit = false;
870-
throw;
871-
}
872-
PreventSubmit = false;
873-
874-
// Scheduler will later omit events, that are not required to execute tasks.
875-
// Host and interop tasks, however, are not submitted to low-level runtimes
876-
// and require separate dependency management.
877-
const CG::CGTYPE Type = Handler.getType();
878-
event Event = detail::createSyclObjFromImpl<event>(
879-
std::make_shared<detail::event_impl>());
880-
881-
if (PostProcess) {
882-
bool IsKernel = Type == CG::Kernel;
883-
bool KernelUsesAssert = false;
884-
885-
if (IsKernel)
886-
// Kernel only uses assert if it's non interop one
887-
KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) &&
888-
ProgramManager::getInstance().kernelUsesAssert(
889-
Handler.MKernelName.c_str());
890-
finalizeHandler(Handler, Event);
891-
892-
(*PostProcess)(IsKernel, KernelUsesAssert, Event);
893-
} else
894-
finalizeHandler(Handler, Event);
895-
896-
addEvent(Event);
897-
return Event;
898-
}
852+
const SubmitPostProcessF *PostProcess);
899853

900854
/// Helper function for submitting a memory operation with a handler.
901855
/// \param Self is a shared_ptr to this queue.

sycl/source/detail/reduction.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <detail/config.hpp>
10+
#include <detail/memory_manager.hpp>
1011
#include <detail/queue_impl.hpp>
1112
#include <sycl/reduction.hpp>
1213

@@ -165,6 +166,17 @@ __SYCL_EXPORT size_t reduGetPreferredWGSize(std::shared_ptr<queue_impl> &Queue,
165166
return reduGetMaxWGSize(Queue, LocalMemBytesPerWorkItem);
166167
}
167168

169+
__SYCL_EXPORT void
170+
addCounterInit(handler &CGH, std::shared_ptr<sycl::detail::queue_impl> &Queue,
171+
std::shared_ptr<int> &Counter) {
172+
auto EventImpl = std::make_shared<detail::event_impl>(Queue);
173+
EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context()));
174+
EventImpl->setStateIncomplete();
175+
MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), 0, {},
176+
&EventImpl->getHandleRef(), EventImpl);
177+
CGH.depends_on(createSyclObjFromImpl<event>(EventImpl));
178+
}
179+
168180
} // namespace detail
169181
} // namespace _V1
170182
} // namespace sycl
Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
11
// RUN: %{build} -I . -o %t.out
22
// RUN: %{run} %t.out
33

4-
#include <cstdlib>
4+
#include <string>
55
#include <sycl/detail/core.hpp>
6+
#include <sycl/usm.hpp>
67
#include <vector>
78

8-
void nestedSubmit() {
9+
void checkExceptionFields(const sycl::exception &e) {
10+
assert(e.code() == sycl::errc::invalid && "Invalid error code");
11+
assert(std::string(e.what()) ==
12+
"Calls to sycl::queue::submit cannot be nested. Command group "
13+
"function objects should use the sycl::handler API instead." &&
14+
"Invalid e.what() string");
15+
}
16+
17+
void nestedSubmitParallelFor(sycl::queue &q) {
918
uint32_t n = 1024;
1019
std::vector<float> array(n);
11-
sycl::queue q{};
1220
{
1321
sycl::buffer<float> buf(array.data(), sycl::range<1>{n});
1422
q.submit([&](sycl::handler &h) {
@@ -19,16 +27,41 @@ void nestedSubmit() {
1927
}
2028
}
2129

22-
int main() {
30+
void nestedSubmitMemset(sycl::queue &q) {
31+
uint32_t n = 1024;
32+
int *data = sycl::malloc_device<int>(n, q);
33+
try {
34+
q.submit([&](sycl::handler &h) { q.memset(data, 0, n * sizeof(int)); });
35+
} catch (...) {
36+
sycl::free(data, q);
37+
throw;
38+
}
39+
sycl::free(data, q);
40+
}
41+
42+
template <typename CommandSubmitterT>
43+
void test(sycl::queue &Queue, CommandSubmitterT QueueSubmit) {
44+
bool ExceptionHappened = false;
2345
try {
24-
nestedSubmit();
46+
QueueSubmit(Queue);
2547
} catch (const sycl::exception &e) {
26-
assert(e.code() == sycl::errc::invalid && "Invalid error code");
27-
assert(std::string(e.what()) ==
28-
"Calls to sycl::queue::submit cannot be nested. Command group "
29-
"function objects should use the sycl::handler API instead." &&
30-
"Invalid e.what() string");
48+
checkExceptionFields(e);
49+
ExceptionHappened = true;
3150
}
32-
std::cout << "test passed" << std::endl;
51+
assert(ExceptionHappened);
52+
// Checks that queue is in a valid state: nested call tracker was cleaned up
53+
// after exception and does not prevent from submission of new commands.
54+
Queue.submit([&](sycl::handler &h) {});
55+
Queue.wait();
56+
}
57+
58+
int main() {
59+
sycl::queue q{};
60+
test(q, nestedSubmitParallelFor);
61+
// All shortcut functions has a common part where nested call detection
62+
// happens. Testing only one of them is enough.
63+
if (q.get_device().get_info<sycl::info::device::usm_device_allocations>())
64+
test(q, nestedSubmitMemset);
65+
3366
return EXIT_SUCCESS;
3467
}

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3314,6 +3314,7 @@ _ZN4sycl3_V16detail13lgamma_r_implEfPi
33143314
_ZN4sycl3_V16detail13make_platformEmNS0_7backendE
33153315
_ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE
33163316
_ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEERKNS0_7contextE
3317+
_ZN4sycl3_V16detail14addCounterInitERNS0_7handlerERSt10shared_ptrINS1_10queue_implEERS4_IiE
33173318
_ZN4sycl3_V16detail14getBorderColorENS0_19image_channel_orderE
33183319
_ZN4sycl3_V16detail14tls_code_loc_t5queryEv
33193320
_ZN4sycl3_V16detail14tls_code_loc_tC1ERKNS1_13code_locationE

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,12 @@
453453
??$import_external_semaphore@Uexternal_semaphore_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uexternal_semaphore_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
454454
??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVdevice@34@AEBVcontext@34@@Z
455455
??$import_external_semaphore@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@experimental@oneapi@ext@_V1@sycl@@YA?AUinterop_semaphore_handle@01234@U?$external_semaphore_descriptor@Uresource_fd@experimental@oneapi@ext@_V1@sycl@@@01234@AEBVqueue@34@@Z
456+
??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z
457+
??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z
458+
??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z
459+
??$update_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$range@$00@45@@Z
460+
??$update_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$range@$01@45@@Z
461+
??$update_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$range@$02@45@@Z
456462
??0AccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z
457463
??0AccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z
458464
??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z
@@ -3932,6 +3938,7 @@
39323938
?add@device_global_map@detail@_V1@sycl@@YAXPEBXPEBD@Z
39333939
?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z
39343940
?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z
3941+
?addCounterInit@detail@_V1@sycl@@YAXAEAVhandler@23@AEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@AEAV?$shared_ptr@H@6@@Z
39353942
?addGraphLeafDependencies@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXVnode@34567@@Z
39363943
?addHostAccessorAndWait@detail@_V1@sycl@@YAXPEAVAccessorImplHost@123@@Z
39373944
?addHostSampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVSampledImageAccessorImplHost@123@@Z
@@ -4355,12 +4362,6 @@
43554362
?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z
43564363
?get_name@kernel_id@_V1@sycl@@QEBAPEBDXZ
43574364
?get_node_from_event@node@experimental@oneapi@ext@_V1@sycl@@SA?AV123456@Vevent@56@@Z
4358-
??$update_nd_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$00@45@@Z
4359-
??$update_nd_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$01@45@@Z
4360-
??$update_nd_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$nd_range@$02@45@@Z
4361-
??$update_range@$00@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$range@$00@45@@Z
4362-
??$update_range@$01@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$range@$01@45@@Z
4363-
??$update_range@$02@node@experimental@oneapi@ext@_V1@sycl@@QEAAXV?$range@$02@45@@Z
43644365
?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
43654366
?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ
43664367
?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z

0 commit comments

Comments
 (0)