Skip to content

Commit

Permalink
PC sampling: chiplet id + integration test fix (#983)
Browse files Browse the repository at this point in the history
* PCS: show chiplet; cover loading/unloading in integration test

* Use (code_object_id, pc_addr) pair as instruction id.
  • Loading branch information
vlaindic authored Jul 22, 2024
1 parent b8a22e6 commit 0f89f04
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 21 deletions.
2 changes: 2 additions & 0 deletions samples/pc_sampling/pcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
<< "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), "
<< "wave_id: " << std::setw(2) << static_cast<unsigned int>(pc_sample->wave_id)
<< ", "
<< "chiplet: " << std::setw(2) << static_cast<unsigned int>(pc_sample->chiplet)
<< ", "
<< "cu_id: " << pc_sample->hw_id << ", "
<< "correlation: {internal=" << std::setw(7)
<< pc_sample->correlation_id.internal << ", "
Expand Down
6 changes: 5 additions & 1 deletion tests/pc_sampling/address_translation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ KernelObject::KernelObject(uint64_t code_object_id,
uint64_t vaddr = begin_address;
while(vaddr < end_address)
{
auto inst = translator.get(vaddr);
auto inst = translator.get(code_object_id, vaddr);
vaddr += inst->size;
this->add_instruction(std::move(inst));
}
Expand Down Expand Up @@ -143,6 +143,10 @@ dump_flat_profile()
{
_sample_instruction->process([&](const SampleInstruction& sample_instruction) {
ss << sample_instruction.sample_count();
// Each instruction should be visited exactly once.
// Otherwise, code object loading/unloading and relocations
// are not handled properly.
assert(visited_instructions.count(sample_instruction.inst()) == 0);
// Assure that each instruction is counted once.
if(visited_instructions.count(sample_instruction.inst()) == 0)
{
Expand Down
51 changes: 32 additions & 19 deletions tests/pc_sampling/address_translation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,27 @@ namespace address_translation
{
using Instruction = rocprofiler::codeobj::disassembly::Instruction;
using CodeobjAddressTranslate = rocprofiler::codeobj::disassembly::CodeobjAddressTranslate;
using marker_id_t = rocprofiler::codeobj::disassembly::marker_id_t;

/**
* @brief Pair (code_object_id, pc_addr) uniquely identifies an instruction.
*/
struct inst_id_t
{
marker_id_t code_object_id;
uint64_t pc_addr;

bool operator==(const inst_id_t& b) const
{
return this->pc_addr == b.pc_addr && this->code_object_id == b.code_object_id;
};

bool operator<(const inst_id_t& b) const
{
if(this->code_object_id == b.code_object_id) return this->pc_addr < b.pc_addr;
return this->code_object_id < b.code_object_id;
};
};

class KernelObject
{
Expand Down Expand Up @@ -207,8 +228,9 @@ class FlatProfile
{
auto lock = std::unique_lock{mut};

auto inst_id = get_instruction_id(*instruction);
auto itr = samples.find(inst_id);
inst_id_t inst_id = {.code_object_id = instruction->codeobj_id,
.pc_addr = instruction->ld_addr};
auto itr = samples.find(inst_id);
if(itr == samples.end())
{
// Add new instruction
Expand All @@ -225,28 +247,19 @@ class FlatProfile
{
auto lock = std::shared_lock{mut};

auto inst_id = get_instruction_id(inst);
auto itr = samples.find(inst_id);
// TODO: Avoid creating a new instance of `inst_id_t` whenever querying
// sampled instructions.
inst_id_t inst_id = {.code_object_id = inst.codeobj_id, .pc_addr = inst.ld_addr};
auto itr = samples.find(inst_id);
if(itr == samples.end()) return nullptr;
return itr->second.get();
return nullptr;
}

private:
// For the sake of this test, we use `ld_addr` as the instruction identifier.
// TODO: To cover code object loading/unloading and relocations,
// use `(code_object_id + ld_addr)` as the unique identifier.
// This assumes the decoder chage to return code_object_id as part
// of the `LoadedCodeobjDecoder::get(uint64_t ld_addr)` method.
using instrution_id_t = uint64_t;
instrution_id_t get_instruction_id(const Instruction& instruction) const
{
// Ensure the decoder determined the `ld_addr`.
assert(instruction.ld_addr > 0);
return instruction.ld_addr;
}

std::unordered_map<instrution_id_t, std::unique_ptr<SampleInstruction>> samples;
mutable std::shared_mutex mut;
// TODO: optimize to use unordered_map
std::map<inst_id_t, std::unique_ptr<SampleInstruction>> samples;
mutable std::shared_mutex mut;
};

std::mutex&
Expand Down
2 changes: 1 addition & 1 deletion tests/pc_sampling/codeobj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ codeobj_tracing_callback(rocprofiler_callback_tracing_record_t record,

// extract symbols from code object
auto& kernel_object_map = client::address_translation::get_kernel_object_map();
auto symbolmap = translator.getSymbolMap();
auto symbolmap = translator.getSymbolMap(data->code_object_id);
for(auto& [vaddr, symbol] : symbolmap)
{
kernel_object_map.add_kernel(
Expand Down
2 changes: 2 additions & 0 deletions tests/pc_sampling/pcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
<< "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), "
<< "wave_id: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->wave_id) << ", "
<< "chiplet: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->chiplet) << ", "
<< "cu_id: " << pc_sample->hw_id << ", "
<< "correlation: {internal=" << std::setw(7)
<< pc_sample->correlation_id.internal << ", "
Expand Down

0 comments on commit 0f89f04

Please sign in to comment.