Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nvtx range and thread naming #4064

Merged
merged 2 commits into from
Dec 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ else()
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
endif()
if (BUILD_PROFILER)
add_definitions(-DWITH_PROFILER)
add_definitions(-DOF_ENABLE_PROFILER)
endif()

enable_testing()
Expand Down
6 changes: 6 additions & 0 deletions oneflow/core/job/oneflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ limitations under the License.
#include "oneflow/core/vm/oneflow_vm.h"
#include "oneflow/core/graph/plan_task_graph.h"
#include "oneflow/core/graph/boxing/collective_boxing_util.h"
#include "oneflow/core/profiler/profiler.h"

namespace std {

Expand Down Expand Up @@ -988,12 +989,17 @@ Maybe<void> CompileAndMergePlanOnMaster(const PbRpf<Job>& conf_jobs, Plan* plan)
} // namespace

Maybe<void> Oneflow::Init(const oneflow::JobSet& job_set) {
OF_PROFILER_RANGE_GUARD("Oneflow::Init");
// Runtime
OF_PROFILER_RANGE_PUSH("CompileAndMergePlanOnMaster");
JUST(CompileAndMergePlanOnMaster(job_set.job(), &plan_));
OF_PROFILER_RANGE_POP(); // CompileAndMergePlanOnMaster
if (Global<MachineCtx>::Get()->IsThisMachineMaster()) {
runtime_buffers_scope_.reset(new RuntimeBuffersScope(plan_));
}
OF_PROFILER_RANGE_PUSH("new Runtime");
runtime_.reset(new Runtime(plan_, GetMaxVal<size_t>(), false));
OF_PROFILER_RANGE_POP(); // new Runtime
return Maybe<void>::Ok();
}

Expand Down
12 changes: 3 additions & 9 deletions oneflow/core/kernel/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ limitations under the License.
*/
#include "oneflow/core/kernel/kernel.h"
#include "oneflow/core/kernel/kernel_helper.h"
#include "oneflow/core/common/cached_caller.h"
#include "oneflow/core/kernel/runtime_blob_shape_infer_helper.h"
#if defined(WITH_PROFILER)
#include "oneflow/core/profiler/profiler.h"
#include "oneflow/core/profiler/kernel.h"
#endif

namespace oneflow {

Expand Down Expand Up @@ -100,13 +98,9 @@ void Kernel::Forward(const KernelCtx& ctx,
ForwardHeader(ctx, BnInOp2Blob);
if (IsAllBlobEmpty(op_attribute().output_bns(), BnInOp2Blob) && IsStateless()) { return; }
SetOutputBlobProducerComputeAccessChecker(BnInOp2Blob);
#if defined(WITH_PROFILER)
profiler::TraceKernelForwardDataContentStart(this, ctx, BnInOp2Blob);
#endif
OF_PROFILER_ONLY_CODE(profiler::TraceKernelForwardDataContentStart(this, ctx, BnInOp2Blob));
ForwardDataContent(ctx, BnInOp2Blob);
#if defined(WITH_PROFILER)
profiler::TraceKernelForwardDataContentEnd(this, ctx, BnInOp2Blob);
#endif
OF_PROFILER_ONLY_CODE(profiler::TraceKernelForwardDataContentEnd(this, ctx, BnInOp2Blob));
SetOutputBlobConsumerAccessChecker(BnInOp2Blob);
}

Expand Down
52 changes: 52 additions & 0 deletions oneflow/core/profiler/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ limitations under the License.
*/

#include "oneflow/core/profiler/profiler.h"
#ifdef OF_ENABLE_PROFILER
#include <nvtx3/nvToolsExt.h>
#include <sys/syscall.h>
#endif // OF_ENABLE_PROFILER

namespace oneflow {

Expand All @@ -41,6 +45,54 @@ void ParseBoolFlagFromEnv(const std::string& env_var, bool* flag) {
*flag = (env_p != nullptr && StringToBool(env_p));
}

void NameThisHostThread(const std::string& name) {
#ifdef OF_ENABLE_PROFILER
nvtxNameOsThreadA(syscall(SYS_gettid), name.c_str());
#endif // OF_ENABLE_PROFILER
}

void RangePush(const std::string& name) {
#ifdef OF_ENABLE_PROFILER
nvtxRangePushA(name.c_str());
#endif // OF_ENABLE_PROFILER
}

void RangePop() {
#ifdef OF_ENABLE_PROFILER
nvtxRangePop();
#endif // OF_ENABLE_PROFILER
}

#ifdef OF_ENABLE_PROFILER

class RangeGuardCtx {
public:
OF_DISALLOW_COPY_AND_MOVE(RangeGuardCtx);
explicit RangeGuardCtx(nvtxRangeId_t range_id) : range_id_(range_id) {}
~RangeGuardCtx() = default;

nvtxRangeId_t range_id() const { return range_id_; }

private:
nvtxRangeId_t range_id_;
};
#else
class RangeGuardCtx {};
#endif // OF_ENABLE_PROFILER

RangeGuard::RangeGuard(const std::string& name) {
#ifdef OF_ENABLE_PROFILER
nvtxRangeId_t range_id = nvtxRangeStartA(name.c_str());
ctx_.reset(new RangeGuardCtx(range_id));
#endif // OF_ENABLE_PROFILER
}

RangeGuard::~RangeGuard() {
#ifdef OF_ENABLE_PROFILER
nvtxRangeEnd(ctx_->range_id());
#endif // OF_ENABLE_PROFILER
}

} // namespace profiler

} // namespace oneflow
33 changes: 33 additions & 0 deletions oneflow/core/profiler/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,39 @@ namespace profiler {

void ParseBoolFlagFromEnv(const std::string& env_var, bool* flag);

void NameThisHostThread(const std::string& name);

void RangePush(const std::string& name);

void RangePop();

class RangeGuardCtx;

class RangeGuard final {
public:
OF_DISALLOW_COPY_AND_MOVE(RangeGuard);
explicit RangeGuard(const std::string& name);
~RangeGuard();

private:
std::shared_ptr<RangeGuardCtx> ctx_;
};

#ifdef OF_ENABLE_PROFILER
#define OF_PROFILER_NAME_THIS_HOST_THREAD(name) ::oneflow::profiler::NameThisHostThread(name)
#define OF_PROFILER_ONLY_CODE(...) __VA_ARGS__
#define OF_PROFILER_RANGE_PUSH(name) ::oneflow::profiler::RangePush(name)
#define OF_PROFILER_RANGE_POP() ::oneflow::profiler::RangePop()
#define OF_PROFILER_RANGE_GUARD(name) \
::oneflow::profiler::RangeGuard OF_PP_CAT(_of_profiler_range_guard_, __COUNTER__)(name)
#else
#define OF_PROFILER_ONLY_CODE(...)
#define OF_PROFILER_RANGE_PUSH(name)
#define OF_PROFILER_RANGE_POP()
#define OF_PROFILER_RANGE_GUARD(name)
#define OF_PROFILER_NAME_THIS_HOST_THREAD(name)
#endif

} // namespace profiler

} // namespace oneflow
Expand Down
6 changes: 4 additions & 2 deletions oneflow/core/thread/cpu_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/thread/cpu_thread.h"
#include "oneflow/core/profiler/profiler.h"

namespace oneflow {

CpuThread::CpuThread(int64_t thrd_id) {
set_thrd_id(thrd_id);
mut_actor_thread() = std::thread([this]() {
mut_actor_thread() = std::thread([this, thrd_id]() {
OF_PROFILER_NAME_THIS_HOST_THREAD("CPU Actor : (" + std::to_string(thrd_id) + ")");
ThreadCtx ctx;
#ifdef WITH_CUDA
ctx.cb_event_chan = nullptr;
#endif
#endif // WITH_CUDA
PollMsgChannel(ctx);
});
}
Expand Down
9 changes: 7 additions & 2 deletions oneflow/core/thread/gpu_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,26 @@ limitations under the License.
*/
#include "oneflow/core/thread/gpu_thread.h"
#include "oneflow/core/device/cuda_stream_handle.h"
#include "oneflow/core/profiler/profiler.h"

namespace oneflow {

#ifdef WITH_CUDA

GpuThread::GpuThread(int64_t thrd_id, int64_t dev_id) {
set_thrd_id(thrd_id);
mut_actor_thread() = std::thread([this, dev_id]() {
mut_actor_thread() = std::thread([this, dev_id, thrd_id]() {
OF_PROFILER_NAME_THIS_HOST_THREAD("GPU " + std::to_string(dev_id) + " Actor : ("
+ std::to_string(thrd_id) + ")");
OF_CUDA_CHECK(cudaSetDevice(dev_id));
ThreadCtx ctx;
ctx.g_cuda_stream.reset(new CudaStreamHandle(&cb_event_chan_));
ctx.cb_event_chan = &cb_event_chan_;
PollMsgChannel(ctx);
});
cb_event_poller_ = std::thread([this, dev_id]() {
cb_event_poller_ = std::thread([this, dev_id, thrd_id]() {
OF_PROFILER_NAME_THIS_HOST_THREAD("GPU " + std::to_string(dev_id) + " Poller : ("
+ std::to_string(thrd_id) + ")");
OF_CUDA_CHECK(cudaSetDevice(dev_id));
CudaCBEvent cb_event;
while (cb_event_chan_.Receive(&cb_event) == kChannelStatusSuccess) {
Expand Down