Skip to content

Commit c303069

Browse files
committed
Adding NVTX support
1 parent edb7d8f commit c303069

File tree

4 files changed

+44
-0
lines changed

4 files changed

+44
-0
lines changed

PyNvCodec/TC/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ project(TC)
2020
enable_language(CUDA)
2121
add_subdirectory(TC_CORE)
2222

23+
set(USE_NVTX FALSE CACHE BOOL "Use NVTX for profiling")
24+
if(USE_NVTX)
25+
add_definitions(-DUSE_NVTX)
26+
endif()
27+
2328
set(VIDEO_CODEC_SDK_DIR "" CACHE PATH "Path to Nvidia Video Codec SDK")
2429
set(FFMPEG_DIR "" CACHE PATH "Path to FFMpeg")
2530

PyNvCodec/TC/inc/Tasks.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,31 @@ extern "C" {
2222
#include <libavutil/frame.h>
2323
}
2424

25+
#ifdef USE_NVTX
26+
#include <nvtx3/nvToolsExt.h>
27+
#define NVTX_PUSH(FNAME) \
28+
do { \
29+
nvtxRangePush(FNAME); \
30+
} while (0);
31+
#define NVTX_POP \
32+
do { \
33+
nvtxRangePop(); \
34+
} while (0);
35+
#else
36+
#define NVTX_PUSH(FNAME)
37+
#define NVTX_POP
38+
#endif
39+
2540
using namespace VPF;
2641

2742
// VPF stands for Video Processing Framework;
2843
namespace VPF {
44+
class DllExport NvtxMark {
45+
public:
46+
NvtxMark(const char *fname) { NVTX_PUSH(fname) }
47+
~NvtxMark() { NVTX_POP }
48+
};
49+
2950
class DllExport NvencEncodeFrame final : public Task {
3051
public:
3152
NvencEncodeFrame() = delete;

PyNvCodec/TC/src/Tasks.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ NvencEncodeFrame::NvencEncodeFrame(CUstream cuStream, CUcontext cuContext,
137137
NvencEncodeFrame::~NvencEncodeFrame() { delete pImpl; };
138138

139139
TaskExecStatus NvencEncodeFrame::Execute() {
140+
NvtxMark tick(__FUNCTION__);
140141
SetOutput(nullptr, 0U);
141142

142143
try {
@@ -279,6 +280,7 @@ NvdecDecodeFrame::~NvdecDecodeFrame() {
279280
}
280281

281282
TaskExecStatus NvdecDecodeFrame::Execute() {
283+
NvtxMark tick(__FUNCTION__);
282284
ClearOutputs();
283285

284286
auto &decoder = pImpl->nvDecoder;
@@ -414,6 +416,7 @@ CudaUploadFrame::CudaUploadFrame(CUstream cuStream, CUcontext cuContext,
414416
CudaUploadFrame::~CudaUploadFrame() { delete pImpl; }
415417

416418
TaskExecStatus CudaUploadFrame::Execute() {
419+
NvtxMark tick(__FUNCTION__);
417420
if (!GetInput()) {
418421
return TASK_EXEC_FAIL;
419422
}
@@ -513,6 +516,7 @@ CudaDownloadSurface::CudaDownloadSurface(CUstream cuStream, CUcontext cuContext,
513516
CudaDownloadSurface::~CudaDownloadSurface() { delete pImpl; }
514517

515518
TaskExecStatus CudaDownloadSurface::Execute() {
519+
NvtxMark tick(__FUNCTION__);
516520

517521
if (!GetInput()) {
518522
return TASK_EXEC_FAIL;
@@ -612,6 +616,7 @@ DemuxFrame::~DemuxFrame() { delete pImpl; }
612616
void DemuxFrame::Flush() { pImpl->demuxer.Flush(); }
613617

614618
TaskExecStatus DemuxFrame::Execute() {
619+
NvtxMark tick(__FUNCTION__);
615620
ClearOutputs();
616621

617622
uint8_t *pVideo = nullptr;
@@ -800,6 +805,7 @@ MuxFrame::~MuxFrame() {
800805
}
801806

802807
TaskExecStatus MuxFrame::Execute() {
808+
NvtxMark tick(__FUNCTION__);
803809
auto elementaryVideo = (Buffer *)GetInput(0U);
804810
auto muxingParamsBuffer = (Buffer *)GetInput(1U);
805811

@@ -894,6 +900,7 @@ struct NppResizeSurfacePacked3C_Impl final : ResizeSurface_Impl {
894900
~NppResizeSurfacePacked3C_Impl() { delete pSurface; }
895901

896902
TaskExecStatus Execute(Surface &source) {
903+
NvtxMark tick(__FUNCTION__);
897904

898905
if (pSurface->PixelFormat() != source.PixelFormat()) {
899906
return TaskExecStatus::TASK_EXEC_FAIL;
@@ -950,6 +957,7 @@ struct NppResizeSurfacePlanar420_Impl final : ResizeSurface_Impl {
950957
~NppResizeSurfacePlanar420_Impl() { delete pSurface; }
951958

952959
TaskExecStatus Execute(Surface &source) {
960+
NvtxMark tick(__FUNCTION__);
953961

954962
if (pSurface->PixelFormat() != source.PixelFormat()) {
955963
cerr << "Actual pixel format is " << source.PixelFormat() << endl;
@@ -1016,6 +1024,7 @@ ResizeSurface::ResizeSurface(uint32_t width, uint32_t height,
10161024
ResizeSurface::~ResizeSurface() { delete pImpl; }
10171025

10181026
TaskExecStatus ResizeSurface::Execute() {
1027+
NvtxMark tick(__FUNCTION__);
10191028
ClearOutputs();
10201029

10211030
auto pInputSurface = (Surface *)GetInput();

PyNvCodec/TC/src/TasksColorCvt.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ struct nv12_bgr final : public NppConvertSurface_Impl {
5050
~nv12_bgr() { delete pSurface; }
5151

5252
Token *Execute(Token *pInputNV12) override {
53+
NvtxMark tick(__FUNCTION__);
5354
if (!pInputNV12) {
5455
return nullptr;
5556
}
@@ -85,6 +86,7 @@ struct nv12_rgb final : public NppConvertSurface_Impl {
8586
~nv12_rgb() { delete pSurface; }
8687

8788
Token *Execute(Token *pInputNV12) override {
89+
NvtxMark tick(__FUNCTION__);
8890
if (!pInputNV12) {
8991
return nullptr;
9092
}
@@ -121,6 +123,7 @@ struct nv12_yuv420 final : public NppConvertSurface_Impl {
121123
~nv12_yuv420() { delete pSurface; }
122124

123125
Token *Execute(Token *pInputNV12) override {
126+
NvtxMark tick(__FUNCTION__);
124127
if (!pInputNV12) {
125128
return nullptr;
126129
}
@@ -163,6 +166,7 @@ struct yuv420_rgb final : public NppConvertSurface_Impl {
163166
~yuv420_rgb() { delete pSurface; }
164167

165168
Token *Execute(Token *pInputYUV420) override {
169+
NvtxMark tick(__FUNCTION__);
166170
if (!pInputYUV420) {
167171
return nullptr;
168172
}
@@ -202,6 +206,7 @@ struct bgr_ycbcr final : public NppConvertSurface_Impl {
202206
~bgr_ycbcr() { delete pSurface; }
203207

204208
Token *Execute(Token *pInput) override {
209+
NvtxMark tick(__FUNCTION__);
205210
auto pInputBGR = (SurfaceRGB *)pInput;
206211

207212
if (BGR != pInputBGR->PixelFormat()) {
@@ -248,6 +253,7 @@ struct rgb_yuv420 final : public NppConvertSurface_Impl {
248253
~rgb_yuv420() { delete pSurface; }
249254

250255
Token *Execute(Token *pInput) override {
256+
NvtxMark tick(__FUNCTION__);
251257
auto pInputRGB8 = (SurfaceRGB *)pInput;
252258

253259
if (RGB != pInputRGB8->PixelFormat()) {
@@ -288,6 +294,7 @@ struct yuv420_nv12 final : public NppConvertSurface_Impl {
288294
~yuv420_nv12() { delete pSurface; }
289295

290296
Token *Execute(Token *pInputYUV420) override {
297+
NvtxMark tick(__FUNCTION__);
291298
if (!pInputYUV420) {
292299
return nullptr;
293300
}
@@ -331,6 +338,7 @@ struct rgb8_deinterleave final : public NppConvertSurface_Impl {
331338
~rgb8_deinterleave() { delete pSurface; }
332339

333340
Token *Execute(Token *pInput) override {
341+
NvtxMark tick(__FUNCTION__);
334342
auto pInputRGB8 = (SurfaceRGB *)pInput;
335343

336344
if (RGB != pInputRGB8->PixelFormat()) {
@@ -374,6 +382,7 @@ struct rbg8_swapchannel final : public NppConvertSurface_Impl {
374382
~rbg8_swapchannel() { delete pSurface; }
375383

376384
Token *Execute(Token *pInput) override {
385+
NvtxMark tick(__FUNCTION__);
377386
if (!pInput) {
378387
return nullptr;
379388
}

0 commit comments

Comments
 (0)