Skip to content

Commit 4c04f2f

Browse files
committed
Adding cuda stream sync after NPP calls
1 parent 0b9db50 commit 4c04f2f

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

PyNvCodec/TC/src/TasksColorCvt.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,11 +486,15 @@ struct rbg8_swapchannel final : public NppConvertSurface_Impl {
486486
};
487487
} // namespace VPF
488488

489+
auto const cuda_stream_sync = [](void *stream) {
490+
cuStreamSynchronize((CUstream)stream);
491+
};
492+
489493
ConvertSurface::ConvertSurface(uint32_t width, uint32_t height,
490494
Pixel_Format inFormat, Pixel_Format outFormat,
491495
CUcontext ctx, CUstream str)
492496
: Task("NppConvertSurface", ConvertSurface::numInputs,
493-
ConvertSurface::numOutputs) {
497+
ConvertSurface::numOutputs, cuda_stream_sync, (void *)str) {
494498
if (NV12 == inFormat && YUV420 == outFormat) {
495499
pImpl = new nv12_yuv420(width, height, ctx, str);
496500
} else if (YUV420 == inFormat && NV12 == outFormat) {

SampleDemuxDecode.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,18 @@ def decode(gpuID, encFilePath, decFilePath):
5454
packet = np.ndarray(shape=(0), dtype=np.uint8)
5555
frameSize = int(nvDmx.Width() * nvDmx.Height() * 3 / 2)
5656
rawFrame = np.ndarray(shape=(frameSize), dtype=np.uint8)
57+
58+
# Determine colorspace conversion parameters.
59+
# Some video streams don't specify these parameters so default values
60+
# are most widespread bt601 and mpeg.
61+
cspace, crange = nvDmx.ColorSpace(), nvDmx.ColorRange()
62+
if nvc.ColorSpace.UNSPEC == cspace:
63+
cspace = nvc.ColorSpace.BT_601
64+
if nvc.ColorRange.UDEF == crange:
65+
crange = nvc.ColorRange.MPEG
66+
cc_ctx = nvc.ColorspaceConversionContext(cspace, crange)
67+
print('Color space: ', str(cspace))
68+
print('Color range: ', str(crange))
5769

5870
while True:
5971
# Demuxer has sync design, it returns packet every time it's called.
@@ -66,7 +78,7 @@ def decode(gpuID, encFilePath, decFilePath):
6678
# decoded surface every time the decoding function is called.
6779
surface_nv12 = nvDec.DecodeSurfaceFromPacket(packet)
6880
if not surface_nv12.Empty():
69-
surface_yuv420 = nvCvt.Execute(surface_nv12)
81+
surface_yuv420 = nvCvt.Execute(surface_nv12, cc_ctx)
7082
if surface_yuv420.Empty():
7183
break
7284
if not nvDwn.DownloadSingleSurface(surface_yuv420, rawFrame):
@@ -79,7 +91,7 @@ def decode(gpuID, encFilePath, decFilePath):
7991
surface_nv12 = nvDec.FlushSingleSurface()
8092
if surface_nv12.Empty():
8193
break
82-
surface_yuv420 = nvCvt.Execute(surface_nv12)
94+
surface_yuv420 = nvCvt.Execute(surface_nv12, cc_ctx)
8395
if surface_yuv420.Empty():
8496
break
8597
if not nvDwn.DownloadSingleSurface(surface_yuv420, rawFrame):
@@ -100,4 +112,4 @@ def decode(gpuID, encFilePath, decFilePath):
100112
encFilePath = sys.argv[2]
101113
decFilePath = sys.argv[3]
102114

103-
decode(gpuID, encFilePath, decFilePath)
115+
decode(gpuID, encFilePath, decFilePath)

0 commit comments

Comments
 (0)