Skip to content

Commit

Permalink
[WIP] Review fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mrowan137 committed Apr 2, 2020
1 parent 5ee7327 commit 6a2d004
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 19 deletions.
6 changes: 2 additions & 4 deletions Src/Base/AMReX_CuptiTrace.H
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>
#include <cuda.h>
#include <cupti.h>
#include <memory>


namespace amrex {
Expand All @@ -22,7 +23,6 @@ void cuptiTraceStop (unsigned boxUintID) noexcept;

class CUpti_Activity_Userdata {
private:
//CUpti_Activity* record_;
unsigned uintID_;
std::string charID_;
unsigned long long startTime_;
Expand All @@ -32,15 +32,13 @@ private:
std::string name_;

public:
//void setRecord (CUpti_Activity*) noexcept;
void setUintID (unsigned) noexcept;
void setCharID (std::string) noexcept;
void setStartTime (unsigned long long) noexcept;
void setEndTime (unsigned long long) noexcept;
void setTimeElapsed (unsigned long long) noexcept;
void setStreamID (int) noexcept;
void setName (std::string) noexcept;
CUpti_Activity* getRecord () noexcept;
unsigned getUintID () noexcept;
std::string getCharID () noexcept;
unsigned long long getStartTime () noexcept;
Expand All @@ -60,7 +58,7 @@ public:
};

extern std::vector<std::unique_ptr<CUpti_Activity_Userdata>> activityRecordUserdata;
double computeElapsedTimeUserdata(std::vector<std::unique_ptr<CUpti_Activity_Userdata>>
double computeElapsedTimeUserdata(const std::vector<std::unique_ptr<CUpti_Activity_Userdata>>&
activityRecordUserdata) noexcept;
}

Expand Down
22 changes: 11 additions & 11 deletions Src/Base/AMReX_CuptiTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
#ifdef AMREX_USE_CUPTI
#include <AMReX_Print.H>
#include <stdio.h>
#include <map>
#include <cuda.h>
#include <cupti.h>
#include <map>
#include <memory>

// CUPTI buffer size, enough for 4096 activity records in a single buffer;
// `CUpti_Activity` objects are 8 bytes long
Expand Down Expand Up @@ -54,17 +55,15 @@ bfrCompleteCallback (CUcontext ctx, uint32_t streamId, uint8_t* bfr,
if (status == CUPTI_SUCCESS) {
std::unique_ptr<CUpti_Activity_Userdata> recordUserData;
recordUserData.reset(new CUpti_Activity_Userdata());
std::unique_ptr<CUpti_Activity_Userdata4> kernel;
kernel.reset( (CUpti_ActivityKernel4*) record);
CUpti_ActivityKernel4* kernel = (CUpti_ActivityKernel4*) record;

// Save record data
//recordUserData->setRecord(record);
recordUserData->setStartTime(kernel->start);
recordUserData->setEndTime(kernel->end);
recordUserData->setTimeElapsed(kernel->end - kernel->start);
recordUserData->setStreamID(kernel->streamId);
recordUserData->setName((std::string)kernel->name);
activityRecordUserdata.push_back(recordUserData);
activityRecordUserdata.push_back( std::move(recordUserData) );
}
else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
// No more records in the buffer
Expand All @@ -84,8 +83,9 @@ bfrCompleteCallback (CUcontext ctx, uint32_t streamId, uint8_t* bfr,
<< " activity records were dropped due to insufficient buffer space\n";
}
}
free(record);
free(bfr);
record = NULL; // Cleanup dangling pointers
kernel = NULL;
}

void
Expand Down Expand Up @@ -121,14 +121,14 @@ cuptiTraceStop (unsigned boxUintID) noexcept
{
cudaDeviceSynchronize();
cuptiActivityFlushAll(0);
for (auto record : activityRecordUserdata) {
for (auto& record : activityRecordUserdata) {
record->setUintID(boxUintID);
record->setCharID( ((std::string) "CharID_") + ((std::string) boxUintID) );
record->setCharID( ((std::string) "CharID_") + std::to_string(boxUintID) );
}
}

double
computeElapsedTimeUserdata (std::vector<std::unique_ptr<CUpti_Activity_Userdata>>
computeElapsedTimeUserdata (const std::vector<std::unique_ptr<CUpti_Activity_Userdata>>&
activityRecordUserdata) noexcept
{
if (activityRecordUserdata.size() == 0) {
Expand All @@ -138,7 +138,7 @@ computeElapsedTimeUserdata (std::vector<std::unique_ptr<CUpti_Activity_Userdata>
std::map<int, unsigned long long> streamIDToElapsedTimeMap;

// Initialize tally of unique streams
for (auto record : activityRecordUserdata) {
for (auto& record : activityRecordUserdata) {
if (streamIDToElapsedTimeMap.find(record->getStreamID())
== streamIDToElapsedTimeMap.end()) {
// Not found
Expand All @@ -149,7 +149,7 @@ computeElapsedTimeUserdata (std::vector<std::unique_ptr<CUpti_Activity_Userdata>
}

// Sum kernel times in each stream
for (auto record : activityRecordUserdata) {
for (auto& record : activityRecordUserdata) {
streamIDToElapsedTimeMap[record->getStreamID()] += record->getTimeElapsed();
}

Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_TinyProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ TinyProfiler::stop (unsigned boxUintID) noexcept
t = computeElapsedTimeUserdata(activityRecordUserdata);
int nKernelCalls = activityRecordUserdata.size();

for (auto record : activityRecordUserdata)
for (auto& record : activityRecordUserdata)
{
record->setUintID(boxUintID);
}
Expand Down
6 changes: 3 additions & 3 deletions Tests/GPU/CuptiTest/Exec/CUDA/myfunc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ void doDeviceSleep (MultiFab& mf, int& n) {

unsigned long long t_start = 0;
unsigned long long t_stop = 0;
for (auto record : activityRecordUserdata) {
for (auto& record : activityRecordUserdata) {
//std::unique_ptr<CUpti_ActivityKernel4> kernel;
//kernel.reset(record->getRecord());
t_start = (unsigned long long) record->getStartTime;//kernel->start;
t_stop = (unsigned long long) record->getEndTime;//kernel->end;
t_start = (unsigned long long) record->getStartTime();//kernel->start;
t_stop = (unsigned long long) record->getEndTime(); //kernel->end;

unsigned long long dt = 0;
dt = (((unsigned long long)t_stop) - ((unsigned long long)t_start));
Expand Down

0 comments on commit 6a2d004

Please sign in to comment.