Skip to content

[SYCL] Fix memory leak in reduction resources #5162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion sycl/include/CL/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ class CGExecKernel : public CG {
std::string MKernelName;
detail::OSModuleHandle MOSModuleHandle;
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
std::vector<std::shared_ptr<const void>> MReductionResources;

CGExecKernel(NDRDescT NDRDesc, std::unique_ptr<HostKernelBase> HKernel,
std::shared_ptr<detail::kernel_impl> SyclKernel,
Expand All @@ -259,14 +260,16 @@ class CGExecKernel : public CG {
std::vector<ArgDesc> Args, std::string KernelName,
detail::OSModuleHandle OSModuleHandle,
std::vector<std::shared_ptr<detail::stream_impl>> Streams,
std::vector<std::shared_ptr<const void>> ReductionResources,
CGTYPE Type, detail::code_location loc = {})
: CG(Type, std::move(ArgsStorage), std::move(AccStorage),
std::move(SharedPtrStorage), std::move(Requirements),
std::move(Events), std::move(loc)),
MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
MSyclKernel(std::move(SyclKernel)), MArgs(std::move(Args)),
MKernelName(std::move(KernelName)), MOSModuleHandle(OSModuleHandle),
MStreams(std::move(Streams)) {
MStreams(std::move(Streams)),
MReductionResources(std::move(ReductionResources)) {
assert((getType() == RunOnHostIntel || getType() == Kernel) &&
"Wrong type of exec kernel CG.");
}
Expand All @@ -277,6 +280,10 @@ class CGExecKernel : public CG {
return MStreams;
}

std::vector<std::shared_ptr<const void>> getReductionResources() const {
return MReductionResources;
}

std::shared_ptr<detail::kernel_bundle_impl> getKernelBundle() {
const std::shared_ptr<std::vector<ExtendedMemberT>> &ExtendedMembers =
getExtendedMembers();
Expand All @@ -290,6 +297,7 @@ class CGExecKernel : public CG {
}

void clearStreams() { MStreams.clear(); }
void clearReductionResources() { MReductionResources.clear(); }
};

/// "Copy memory" command group class.
Expand Down
6 changes: 2 additions & 4 deletions sycl/include/CL/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,12 +472,9 @@ class __SYCL_EXPORT handler {
/// Saves buffers created by handling reduction feature in handler.
/// They are then forwarded to command group and destroyed only after
/// the command group finishes the work on device/host.
/// The 'MSharedPtrStorage' suits that need.
///
/// @param ReduObj is a pointer to object that must be stored.
void addReduction(const std::shared_ptr<const void> &ReduObj) {
MSharedPtrStorage.push_back(ReduObj);
}
void addReduction(const std::shared_ptr<const void> &ReduObj);

~handler() = default;

Expand Down Expand Up @@ -1271,6 +1268,7 @@ class __SYCL_EXPORT handler {
}

std::shared_ptr<detail::handler_impl> getHandlerImpl() const;
std::shared_ptr<detail::handler_impl> evictHandlerImpl() const;

void setStateExplicitKernelBundle();
void setStateSpecConstSet();
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/handler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ class handler_impl {
/// equal to the queue associated with the handler if the corresponding
/// submission is a fallback from a previous submission.
std::shared_ptr<queue_impl> MSubmissionSecondaryQueue;

// Protects reduction resources
std::mutex MReductionResourcesMutex;

// Stores additional resources used by reductions.
std::vector<std::shared_ptr<const void>> MReductionResources;
};

} // namespace detail
Expand Down
12 changes: 12 additions & 0 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1307,11 +1307,23 @@ std::vector<StreamImplPtr> ExecCGCommand::getStreams() const {
return {};
}

std::vector<std::shared_ptr<const void>>
ExecCGCommand::getReductionResources() const {
if (MCommandGroup->getType() == CG::Kernel)
return ((CGExecKernel *)MCommandGroup.get())->getReductionResources();
return {};
}

void ExecCGCommand::clearStreams() {
if (MCommandGroup->getType() == CG::Kernel)
((CGExecKernel *)MCommandGroup.get())->clearStreams();
}

void ExecCGCommand::clearReductionResources() {
if (MCommandGroup->getType() == CG::Kernel)
((CGExecKernel *)MCommandGroup.get())->clearReductionResources();
}

cl_int UpdateHostRequirementCommand::enqueueImp() {
waitForPreparedHostEvents();
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,10 @@ class ExecCGCommand : public Command {
ExecCGCommand(std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue);

std::vector<StreamImplPtr> getStreams() const;
std::vector<std::shared_ptr<const void>> getReductionResources() const;

void clearStreams();
void clearReductionResources();

void printDot(std::ostream &Stream) const final;
void emitInstrumentationData() final;
Expand Down
26 changes: 24 additions & 2 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,8 @@ void Scheduler::GraphBuilder::decrementLeafCountersForRecord(

void Scheduler::GraphBuilder::cleanupCommandsForRecord(
MemObjRecord *Record,
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate) {
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate,
std::vector<std::shared_ptr<const void>> &ReduResourcesToDeallocate) {
std::vector<AllocaCommandBase *> &AllocaCommands = Record->MAllocaCommands;
if (AllocaCommands.empty())
return;
Expand Down Expand Up @@ -1053,10 +1054,20 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(
// Collect stream objects for a visited command.
if (Cmd->getType() == Command::CommandType::RUN_CG) {
auto ExecCmd = static_cast<ExecCGCommand *>(Cmd);

// Transfer ownership of stream implementations.
std::vector<std::shared_ptr<stream_impl>> Streams = ExecCmd->getStreams();
ExecCmd->clearStreams();
StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(),
Streams.end());

// Transfer ownership of reduction resources.
std::vector<std::shared_ptr<const void>> ReduResources =
ExecCmd->getReductionResources();
ExecCmd->clearReductionResources();
ReduResourcesToDeallocate.insert(ReduResourcesToDeallocate.end(),
ReduResources.begin(),
ReduResources.end());
}

for (Command *UserCmd : Cmd->MUsers)
Expand Down Expand Up @@ -1098,7 +1109,8 @@ void Scheduler::GraphBuilder::cleanupCommandsForRecord(

void Scheduler::GraphBuilder::cleanupFinishedCommands(
Command *FinishedCmd,
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate) {
std::vector<std::shared_ptr<stream_impl>> &StreamsToDeallocate,
std::vector<std::shared_ptr<const void>> &ReduResourcesToDeallocate) {
assert(MCmdsToVisit.empty());
MCmdsToVisit.push(FinishedCmd);
MVisitedCmds.clear();
Expand All @@ -1114,10 +1126,20 @@ void Scheduler::GraphBuilder::cleanupFinishedCommands(
// Collect stream objects for a visited command.
if (Cmd->getType() == Command::CommandType::RUN_CG) {
auto ExecCmd = static_cast<ExecCGCommand *>(Cmd);

// Transfer ownership of stream implementations.
std::vector<std::shared_ptr<stream_impl>> Streams = ExecCmd->getStreams();
ExecCmd->clearStreams();
StreamsToDeallocate.insert(StreamsToDeallocate.end(), Streams.begin(),
Streams.end());

// Transfer ownership of reduction resources.
std::vector<std::shared_ptr<const void>> ReduResources =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm currently working on moving away from finished command cleanup towards cleanup after enqueue in order to address some queue::wait related performance issues, see #5070. Since stream handling is currently tied to post wait traversal, kernels with streams still use finished command cleanup in the linked initial implementation. Host tasks are another exception, for a different reason. Once the implementation is fully complete, cleanupFinishedCommands should be dropped entirely.

With this patch CGs with reduction resources would have to be yet another temporary exception waiting for a different solution. So I would really prefer if addressing this didn't involve finished subgraph traversal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. I will convert this to a draft while I think of something.

Copy link

@olegmaslovatintel olegmaslovatintel Feb 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm currently working on moving away from finished command cleanup towards cleanup after enqueue in order to address some queue::wait related performance issues, see #5070

@sergey-semenov @steffenlarsen folks, could you please if this patch is still relevant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With #5070 merged this should be redesigned to work with the new approach. The problem that this PR attempts to solve is still there, but the solution is no longer applicable. I will close the PR but we should still be able to discuss it here.

ExecCmd->getReductionResources();
ExecCmd->clearReductionResources();
ReduResourcesToDeallocate.insert(ReduResourcesToDeallocate.end(),
ReduResources.begin(),
ReduResources.end());
}

for (const DepDesc &Dep : Cmd->MDeps) {
Expand Down
16 changes: 14 additions & 2 deletions sycl/source/detail/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,11 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
// objects, this is needed to guarantee that streamed data is printed and
// resources are released.
std::vector<std::shared_ptr<stream_impl>> StreamsToDeallocate;
// Similar to streams, we also collect the reduction resources used by the
// commands. Cleanup will make sure the commands do not own the resources
// anymore, so we just need them to survive the graph lock then they can die
// as they go out of scope.
std::vector<std::shared_ptr<const void>> ReduResourcesToDeallocate;
{
// Avoiding deadlock situation, where one thread is in the process of
// enqueueing (with a locked mutex) a currently blocked task that waits for
Expand All @@ -239,7 +244,8 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
// The command might have been cleaned up (and set to nullptr) by another
// thread
if (FinishedCmd)
MGraphBuilder.cleanupFinishedCommands(FinishedCmd, StreamsToDeallocate);
MGraphBuilder.cleanupFinishedCommands(FinishedCmd, StreamsToDeallocate,
ReduResourcesToDeallocate);
}
}
deallocateStreams(StreamsToDeallocate);
Expand All @@ -251,6 +257,11 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
// objects, this is needed to guarantee that streamed data is printed and
// resources are released.
std::vector<std::shared_ptr<stream_impl>> StreamsToDeallocate;
// Similar to streams, we also collect the reduction resources used by the
// commands. Cleanup will make sure the commands do not own the resources
// anymore, so we just need them to survive the graph lock then they can die
// as they go out of scope.
std::vector<std::shared_ptr<const void>> ReduResourcesToDeallocate;

{
MemObjRecord *Record = nullptr;
Expand All @@ -272,7 +283,8 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
WriteLockT Lock(MGraphLock, std::defer_lock);
acquireWriteLock(Lock);
MGraphBuilder.decrementLeafCountersForRecord(Record);
MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate);
MGraphBuilder.cleanupCommandsForRecord(Record, StreamsToDeallocate,
ReduResourcesToDeallocate);
MGraphBuilder.removeRecordForMemObj(MemObj);
}
}
Expand Down
6 changes: 4 additions & 2 deletions sycl/source/detail/scheduler/scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,8 @@ class Scheduler {
/// (assuming that all its commands have been waited for).
void cleanupFinishedCommands(
Command *FinishedCmd,
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &);
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &,
std::vector<std::shared_ptr<const void>> &);

/// Reschedules the command passed using Queue provided.
///
Expand All @@ -535,7 +536,8 @@ class Scheduler {
/// Removes commands that use the given MemObjRecord from the graph.
void cleanupCommandsForRecord(
MemObjRecord *Record,
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &);
std::vector<std::shared_ptr<cl::sycl::detail::stream_impl>> &,
std::vector<std::shared_ptr<const void>> &);

/// Removes the MemObjRecord for the memory object passed.
void removeRecordForMemObj(SYCLMemObjI *MemObject);
Expand Down
51 changes: 39 additions & 12 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,40 @@ handler::handler(std::shared_ptr<detail::queue_impl> Queue,
MSharedPtrStorage.push_back(std::move(ExtendedMembers));
}

static detail::ExtendedMemberT &getHandlerImplMember(
std::vector<std::shared_ptr<const void>> &SharedPtrStorage) {
assert(!SharedPtrStorage.empty());
std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
detail::convertToExtendedMembers(SharedPtrStorage[0]);
assert(ExtendedMembersVec->size() > 0);
auto &HandlerImplMember = (*ExtendedMembersVec)[0];
assert(detail::ExtendedMembersType::HANDLER_IMPL == HandlerImplMember.MType);
return HandlerImplMember;
}

/// Gets the handler_impl at the start of the extended members.
std::shared_ptr<detail::handler_impl> handler::getHandlerImpl() const {
std::lock_guard<std::mutex> Lock(
detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
return std::static_pointer_cast<detail::handler_impl>(
getHandlerImplMember(MSharedPtrStorage).MData);
}

assert(!MSharedPtrStorage.empty());

std::shared_ptr<std::vector<detail::ExtendedMemberT>> ExtendedMembersVec =
detail::convertToExtendedMembers(MSharedPtrStorage[0]);

assert(ExtendedMembersVec->size() > 0);

auto HandlerImplMember = (*ExtendedMembersVec)[0];
/// Gets the handler_impl at the start of the extended members and removes it.
std::shared_ptr<detail::handler_impl> handler::evictHandlerImpl() const {
std::lock_guard<std::mutex> Lock(
detail::GlobalHandler::instance().getHandlerExtendedMembersMutex());
auto &HandlerImplMember = getHandlerImplMember(MSharedPtrStorage);
auto Impl =
std::static_pointer_cast<detail::handler_impl>(HandlerImplMember.MData);

assert(detail::ExtendedMembersType::HANDLER_IMPL == HandlerImplMember.MType);
// Reset the data of the member.
// NOTE: We let it stay because removing the front can be expensive. This will
// be improved when the impl is made a member of handler. In fact eviction is
// likely to not be needed when that happens.
HandlerImplMember.MData.reset();

return std::static_pointer_cast<detail::handler_impl>(
HandlerImplMember.MData);
return Impl;
}

// Sets the submission state to indicate that an explicit kernel bundle has been
Expand Down Expand Up @@ -220,6 +236,10 @@ event handler::finalize() {
return MLastEvent;
}

// Evict handler_impl from extended members to make sure the command group
// does not keep it alive.
std::shared_ptr<detail::handler_impl> Impl = evictHandlerImpl();

std::unique_ptr<detail::CG> CommandGroup;
switch (type) {
case detail::CG::Kernel:
Expand All @@ -232,7 +252,8 @@ event handler::finalize() {
std::move(MArgsStorage), std::move(MAccStorage),
std::move(MSharedPtrStorage), std::move(MRequirements),
std::move(MEvents), std::move(MArgs), MKernelName, MOSModuleHandle,
std::move(MStreamStorage), MCGType, MCodeLoc));
std::move(MStreamStorage), std::move(Impl->MReductionResources),
MCGType, MCodeLoc));
break;
}
case detail::CG::CodeplayInteropTask:
Expand Down Expand Up @@ -321,6 +342,12 @@ event handler::finalize() {
return MLastEvent;
}

void handler::addReduction(const std::shared_ptr<const void> &ReduObj) {
std::shared_ptr<detail::handler_impl> Impl = getHandlerImpl();
std::lock_guard<std::mutex> Lock(Impl->MReductionResourcesMutex);
Impl->MReductionResources.push_back(ReduObj);
}

void handler::associateWithHandler(detail::AccessorBaseHost *AccBase,
access::target AccTarget) {
detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase);
Expand Down
10 changes: 4 additions & 6 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3940,6 +3940,7 @@ _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE
_ZN2cl4sycl7handler10mem_adviseEPKvmi
_ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb
_ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb
_ZN2cl4sycl7handler12addReductionERKSt10shared_ptrIKvE
_ZN2cl4sycl7handler13getKernelNameB5cxx11Ev
_ZN2cl4sycl7handler17use_kernel_bundleERKNS0_13kernel_bundleILNS0_12bundle_stateE2EEE
_ZN2cl4sycl7handler18RangeRoundingTraceEv
Expand Down Expand Up @@ -4231,12 +4232,12 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65574EEENS3_12param_traitsIS4_XT_
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65575EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65808EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device9getNativeEv
_ZNK2cl4sycl6kernel11get_backendEv
_ZNK2cl4sycl6kernel11get_contextEv
_ZNK2cl4sycl6kernel11get_programEv
_ZNK2cl4sycl6kernel13getNativeImplEv
_ZNK2cl4sycl6kernel9getNativeEv
_ZNK2cl4sycl6kernel17get_kernel_bundleEv
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE16650EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
_ZNK2cl4sycl6kernel18get_sub_group_infoILNS0_4info16kernel_sub_groupE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE
Expand Down Expand Up @@ -4265,6 +4266,7 @@ _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4498EEENS3_12param_traitsIS4_XT_E
_ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4499EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4500EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4501EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6kernel9getNativeEv
_ZNK2cl4sycl6stream22get_max_statement_sizeEv
_ZNK2cl4sycl6stream8get_sizeEv
_ZNK2cl4sycl6streameqERKS1_
Expand Down Expand Up @@ -4306,6 +4308,7 @@ _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4228EEENS3_12param_traitsIS4_XT
_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65552EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl7context9getNativeEv
_ZNK2cl4sycl7handler14getHandlerImplEv
_ZNK2cl4sycl7handler16evictHandlerImplEv
_ZNK2cl4sycl7handler27isStateExplicitKernelBundleEv
_ZNK2cl4sycl7handler30getOrInsertHandlerKernelBundleEb
_ZNK2cl4sycl7program10get_kernelENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Expand Down Expand Up @@ -4405,8 +4408,3 @@ _ZNK2cl4sycl9exception8categoryEv
_ZNK2cl4sycl9kernel_id8get_nameEv
__sycl_register_lib
__sycl_unregister_lib
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131072EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131075EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131074EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131073EEENS3_12param_traitsIS4_XT_EE11return_typeEv
_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65810EEENS3_12param_traitsIS4_XT_EE11return_typeEv
4 changes: 3 additions & 1 deletion sycl/unittests/program_manager/EliminatedArgMask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <detail/handler_impl.hpp>
#include <detail/kernel_bundle_impl.hpp>
#include <detail/queue_impl.hpp>
#include <detail/scheduler/commands.hpp>
Expand Down Expand Up @@ -126,6 +127,7 @@ class MockHandler : public sycl::handler {

std::unique_ptr<sycl::detail::CG> finalize() {
auto CGH = static_cast<sycl::handler *>(this);
std::shared_ptr<sycl::detail::handler_impl> Impl = evictHandlerImpl();
std::unique_ptr<sycl::detail::CG> CommandGroup;
switch (getType()) {
case sycl::detail::CG::Kernel: {
Expand All @@ -136,7 +138,7 @@ class MockHandler : public sycl::handler {
std::move(CGH->MRequirements), std::move(CGH->MEvents),
std::move(CGH->MArgs), std::move(CGH->MKernelName),
std::move(CGH->MOSModuleHandle), std::move(CGH->MStreamStorage),
CGH->MCGType, CGH->MCodeLoc));
std::move(Impl->MReductionResources), CGH->MCGType, CGH->MCodeLoc));
break;
}
default:
Expand Down
Loading