Skip to content

Commit

Permalink
[PROTON] Roctracer: convert agent id to gpu id for gpu ops (#4090)
Browse files Browse the repository at this point in the history
Roctracer reports (global) agent ids for the location of async ops, e.g.
kernels and copies.
The profiler would be better suited with gpu indexes (zero based).

Created a mapping function to apply to values stored in
KernelMetric::DeviceId.

Caveat: if devices are hidden using HIP_VISIBLE_DEVICES then the hip
device id, e.g. via hipGetDevice()/hipSetDevice(), will not match the
reported unfiltered id. Additional support in hip will be needed to map
through the filtering correctly.

---------

Co-authored-by: Keren Zhou <robinho364@gmail.com>
  • Loading branch information
mwootton and Jokeren authored Jun 7, 2024
1 parent 107fed4 commit 60613fb
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 1 deletion.
2 changes: 2 additions & 0 deletions third_party/proton/csrc/include/Driver/GPU/HipApi.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ template <bool CheckSuccess>
hipError_t deviceGetAttribute(int *value, hipDeviceAttribute_t attribute,
int deviceId);

template <bool CheckSuccess> hipError_t getDeviceCount(int *count);

Device getDevice(uint64_t index);

const char *getKernelNameRef(const hipFunction_t f);
Expand Down
23 changes: 23 additions & 0 deletions third_party/proton/csrc/include/Driver/GPU/HsaApi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef PROTON_DRIVER_GPU_HSA_H_
#define PROTON_DRIVER_GPU_HSA_H_

#include "Driver/Device.h"
#include "hsa/hsa_ext_amd.h"

namespace proton {

namespace hsa {

template <bool CheckSuccess>
hsa_status_t agentGetInfo(hsa_agent_t agent, hsa_agent_info_t attribute,
void *value);

hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent,
void *data),
void *data);

} // namespace hsa

} // namespace proton

#endif // PROTON_DRIVER_GPU_HSA_H_
2 changes: 2 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ DEFINE_DISPATCH(ExternLibHip, deviceSynchronize, hipDeviceSynchronize)
DEFINE_DISPATCH(ExternLibHip, deviceGetAttribute, hipDeviceGetAttribute, int *,
hipDeviceAttribute_t, int);

DEFINE_DISPATCH(ExternLibHip, getDeviceCount, hipGetDeviceCount, int *);

Device getDevice(uint64_t index) {
int clockRate;
(void)hip::deviceGetAttribute<true>(&clockRate, hipDeviceAttributeClockRate,
Expand Down
35 changes: 35 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "Driver/GPU/HsaApi.h"
#include "Driver/Dispatch.h"

namespace proton {

namespace hsa {

struct ExternLibHsa : public ExternLibBase {
using RetType = hsa_status_t;
static constexpr const char *name = "libhsa-runtime64.so";
static constexpr RetType success = HSA_STATUS_SUCCESS;
static void *lib;
};

void *ExternLibHsa::lib = nullptr;

DEFINE_DISPATCH(ExternLibHsa, agentGetInfo, hsa_agent_get_info, hsa_agent_t,
hsa_agent_info_t, void *);

hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent,
void *data),
void *data) {
typedef hsa_status_t (*hsa_iterate_agents_t)(
hsa_status_t (*)(hsa_agent_t, void *), void *data);
static hsa_iterate_agents_t func = nullptr;
Dispatch<ExternLibHsa>::init(ExternLibHsa::name, &ExternLibHsa::lib);
if (func == nullptr)
func = reinterpret_cast<hsa_iterate_agents_t>(
dlsym(ExternLibHsa::lib, "hsa_iterate_agents"));
return (func ? func(callback, data) : HSA_STATUS_ERROR_FATAL);
}

} // namespace hsa

} // namespace proton
32 changes: 31 additions & 1 deletion third_party/proton/csrc/lib/Profiler/RoctracerProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "Context/Context.h"
#include "Data/Metric.h"
#include "Driver/GPU/HipApi.h"
#include "Driver/GPU/HsaApi.h"
#include "Driver/GPU/RoctracerApi.h"

#include "hip/amd_detail/hip_runtime_prof.h"
Expand All @@ -26,6 +27,34 @@ thread_local GPUProfiler<RoctracerProfiler>::ProfilerState

namespace {

// Node to device id mapping
int deviceOffset = 0x7fffffff;

void createDeviceMap() {
int dc = 0;
auto ret = hip::getDeviceCount<true>(&dc);
hsa::iterateAgents(
[](hsa_agent_t agent, void *data) {
auto &deviceOffset = *static_cast<int *>(data);
int nodeId;
hsa::agentGetInfo<true>(
agent,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
&nodeId);
int deviceType;
hsa::agentGetInfo<true>(
agent, static_cast<hsa_agent_info_t>(HSA_AGENT_INFO_DEVICE),
&deviceType);
if ((nodeId < deviceOffset) && (deviceType == HSA_DEVICE_TYPE_GPU))
deviceOffset = nodeId;

return HSA_STATUS_SUCCESS;
},
&deviceOffset);
};

int mapDeviceId(int id) { return id - deviceOffset; };

std::shared_ptr<Metric>
convertActivityToMetric(const roctracer_record_t *activity) {
std::shared_ptr<Metric> metric;
Expand All @@ -34,7 +63,7 @@ convertActivityToMetric(const roctracer_record_t *activity) {
metric = std::make_shared<KernelMetric>(
static_cast<uint64_t>(activity->begin_ns),
static_cast<uint64_t>(activity->end_ns), 1,
static_cast<uint64_t>(activity->device_id),
static_cast<uint64_t>(mapDeviceId(activity->device_id)),
static_cast<uint64_t>(DeviceType::HIP));
break;
}
Expand Down Expand Up @@ -306,6 +335,7 @@ void RoctracerProfiler::RoctracerProfilerPimpl::doStop() {

RoctracerProfiler::RoctracerProfiler() {
pImpl = std::make_unique<RoctracerProfilerPimpl>(*this);
createDeviceMap();
}

RoctracerProfiler::~RoctracerProfiler() = default;
Expand Down

0 comments on commit 60613fb

Please sign in to comment.