Skip to content

[SYCL] Add profiling info for host execution #679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions sycl/include/CL/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class NDRDescT {
class HostKernelBase {
public:
// The method executes lambda stored using NDRange passed.
virtual void call(const NDRDescT &NDRDesc) = 0;
virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0;
// Return pointer to the lambda object.
// Used to extract captured variables.
virtual char *getPtr() = 0;
Expand All @@ -149,7 +149,7 @@ class HostKernel : public HostKernelBase {

public:
HostKernel(KernelType Kernel) : MKernel(Kernel) {}
void call(const NDRDescT &NDRDesc) override {
void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override {
// adjust ND range for serial host:
NDRDescT AdjustedRange;
bool Adjust = false;
Expand All @@ -167,7 +167,11 @@ class HostKernel : public HostKernelBase {
Adjust = true;
}
const NDRDescT &R = Adjust ? AdjustedRange : NDRDesc;
if (HPI)
HPI->start();
runOnHost(R);
if (HPI)
HPI->end();
}

char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }
Expand Down
20 changes: 20 additions & 0 deletions sycl/include/CL/sycl/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,26 @@ class context;
namespace detail {
class context_impl;
using ContextImplPtr = std::shared_ptr<cl::sycl::detail::context_impl>;
class queue_impl;

// Profiling info for the host execution.
class HostProfilingInfo {
cl_ulong StartTime = 0;
cl_ulong EndTime = 0;

public:
cl_ulong getStartTime() const { return StartTime; }
cl_ulong getEndTime() const { return EndTime; }

void start();
void end();
};

class event_impl {
public:
event_impl() = default;
event_impl(cl_event CLEvent, const context &SyclContext);
event_impl(std::shared_ptr<cl::sycl::detail::queue_impl> Queue);

// Threat all devices that don't support interoperability as host devices to
// avoid attempts to call method get on such events.
Expand Down Expand Up @@ -65,11 +80,16 @@ class event_impl {

void setCommand(void *Command) { m_Command = Command; }

HostProfilingInfo *getHostProfilingInfo() {
return m_HostProfilingInfo.get();
}

private:
RT::PiEvent m_Event = nullptr;
ContextImplPtr m_Context;
bool m_OpenCLInterop = false;
bool m_HostEvent = true;
std::unique_ptr<HostProfilingInfo> m_HostProfilingInfo;
void *m_Command = nullptr;
};

Expand Down
39 changes: 30 additions & 9 deletions sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include <CL/sycl/detail/queue_impl.hpp>
#include <CL/sycl/detail/scheduler/scheduler.hpp>

#include <chrono>

namespace cl {
namespace sycl {
namespace detail {
Expand Down Expand Up @@ -81,6 +83,15 @@ event_impl::event_impl(cl_event CLEvent, const context &SyclContext)
PI_CALL(RT::piEventRetain(m_Event));
}

event_impl::event_impl(std::shared_ptr<cl::sycl::detail::queue_impl> Queue) {
if (Queue->is_host() &&
Queue->has_property<property::queue::enable_profiling>()) {
m_HostProfilingInfo.reset(new HostProfilingInfo());
if (!m_HostProfilingInfo)
throw runtime_error("Out of host memory");
}
}

void event_impl::wait(
std::shared_ptr<cl::sycl::detail::event_impl> Self) const {

Expand Down Expand Up @@ -110,8 +121,9 @@ event_impl::get_profiling_info<info::event_profiling::command_submit>() const {
return get_event_profiling_info<
info::event_profiling::command_submit>::_(this->getHandleRef());
}
assert(!"Not implemented for host device.");
return (cl_ulong)0;
if (!m_HostProfilingInfo)
throw invalid_object_error("Profiling info is not available.");
return m_HostProfilingInfo->getStartTime();
}

template <>
Expand All @@ -121,8 +133,9 @@ event_impl::get_profiling_info<info::event_profiling::command_start>() const {
return get_event_profiling_info<info::event_profiling::command_start>::_(
this->getHandleRef());
}
assert(!"Not implemented for host device.");
return (cl_ulong)0;
if (!m_HostProfilingInfo)
throw invalid_object_error("Profiling info is not available.");
return m_HostProfilingInfo->getStartTime();
}

template <>
Expand All @@ -132,17 +145,17 @@ event_impl::get_profiling_info<info::event_profiling::command_end>() const {
return get_event_profiling_info<info::event_profiling::command_end>::_(
this->getHandleRef());
}
assert(!"Not implemented for host device.");
return (cl_ulong)0;
if (!m_HostProfilingInfo)
throw invalid_object_error("Profiling info is not available.");
return m_HostProfilingInfo->getEndTime();
}

template <> cl_uint event_impl::get_info<info::event::reference_count>() const {
if (!m_HostEvent) {
return get_event_info<info::event::reference_count>::_(
this->getHandleRef());
}
assert(!"Not implemented for host device.");
return (cl_ulong)0;
return 0;
}

template <>
Expand All @@ -152,10 +165,18 @@ event_impl::get_info<info::event::command_execution_status>() const {
return get_event_info<info::event::command_execution_status>::_(
this->getHandleRef());
}
assert(!"Not implemented for host device.");
return info::event_command_status::complete;
}

static uint64_t getTimestamp() {
auto ts = std::chrono::high_resolution_clock::now().time_since_epoch();
return std::chrono::duration_cast<std::chrono::nanoseconds>(ts).count();
}

void HostProfilingInfo::start() { StartTime = getTimestamp(); }

void HostProfilingInfo::end() { EndTime = getTimestamp(); }

} // namespace detail
} // namespace sycl
} // namespace cl
7 changes: 4 additions & 3 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ std::vector<RT::PiEvent> Command::prepareEvents(ContextImplPtr Context) {
Command::Command(CommandType Type, QueueImplPtr Queue, bool UseExclusiveQueue)
: MQueue(std::move(Queue)), MUseExclusiveQueue(UseExclusiveQueue),
MType(Type), MEnqueued(false) {
MEvent.reset(new detail::event_impl());
MEvent.reset(new detail::event_impl(MQueue));
MEvent->setCommand(this);
MEvent->setContextImpl(detail::getSyclObjImpl(MQueue->get_context()));
}
Expand Down Expand Up @@ -550,7 +550,7 @@ void DispatchNativeKernel(void *Blob) {
void **NextArg = (void **)Blob + 1;
for (detail::Requirement *Req : HostTask->MRequirements)
Req->MData = *(NextArg++);
HostTask->MHostKernel->call(HostTask->MNDRDesc);
HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr);
}

cl_int ExecCGCommand::enqueueImp() {
Expand Down Expand Up @@ -702,7 +702,8 @@ cl_int ExecCGCommand::enqueueImp() {
}
if (!RawEvents.empty())
PI_CALL(RT::piEventsWait(RawEvents.size(), &RawEvents[0]));
ExecKernel->MHostKernel->call(NDRDesc);
ExecKernel->MHostKernel->call(NDRDesc,
getEvent()->getHostProfilingInfo());
return CL_SUCCESS;
}

Expand Down
14 changes: 11 additions & 3 deletions sycl/test/basic_tests/event_profiling_info.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// RUN: %clangxx -fsycl %s -o %t.out
//
// Profiling info is not supported on host device so far.
//
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
Expand All @@ -14,6 +13,7 @@
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>
#include <cassert>

using namespace cl;

Expand All @@ -25,7 +25,15 @@ int main() {
CGH.single_task<class EmptyKernel>([=]() {});
});

Event.get_profiling_info<sycl::info::event_profiling::command_start>();
auto Submit =
Event.get_profiling_info<sycl::info::event_profiling::command_submit>();
auto Start =
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
auto End =
Event.get_profiling_info<sycl::info::event_profiling::command_end>();

assert(Submit <= Start);
assert(Start <= End);

bool Fail = sycl::info::event_command_status::complete !=
Event.get_info<sycl::info::event::command_execution_status>();
Expand Down