Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Video encoder frame #893

Merged
merged 23 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ pybind11_add_module(${TARGET_NAME}
src/pipeline/datatype/ImageManipConfigBindings.cpp
src/pipeline/datatype/ImgDetectionsBindings.cpp
src/pipeline/datatype/ImgFrameBindings.cpp
src/pipeline/datatype/EncodedFrameBindings.cpp
src/pipeline/datatype/IMUDataBindings.cpp
src/pipeline/datatype/NNDataBindings.cpp
src/pipeline/datatype/SpatialImgDetectionsBindings.cpp
Expand Down
69 changes: 69 additions & 0 deletions examples/VideoEncoder/rgb_encoding_encodedframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3

import depthai as dai

def frametype2str(ft):
if ft == dai.EncodedFrame.FrameType.I:
return "I"
elif ft == dai.EncodedFrame.FrameType.P:
return "P"
elif ft == dai.EncodedFrame.FrameType.B:
return "B"

def compress(ls):
curr = ls[0]
count = 1
res = []
for i in range(1, len(ls)):
if ls[i] == curr:
count += 1
else:
res.append((count, curr))
curr = ls[i]
count = 1
res.append((count, curr))
return res


# Create pipeline
pipeline = dai.Pipeline()

# Define sources and output
camRgb = pipeline.create(dai.node.ColorCamera)
videoEnc = pipeline.create(dai.node.VideoEncoder)
xout = pipeline.create(dai.node.XLinkOut)

xout.setStreamName('h265')

# Properties
camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
videoEnc.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)

# Linking
camRgb.video.link(videoEnc.input)
videoEnc.out.link(xout.input)

frametypes = []
# Connect to device and start pipeline
with dai.Device(pipeline) as device:

# Output queue will be used to get the encoded data from the output defined above
q = device.getOutputQueue(name="h265", maxSize=30, blocking=True)

# The .h265 file is a raw stream file (not playable yet)
with open('video.h265', 'wb') as videoFile:
print("Press Ctrl+C to stop encoding...")
try:
while True:
h265Packet = q.get() # Blocking call, will wait until a new data has arrived
frametypes.append(frametype2str(h265Packet.getFrameType()))
h265Packet.getData().tofile(videoFile) # Appends the packet data to the opened file
except KeyboardInterrupt:
# Keyboard interrupt (Ctrl + C) detected
pass

print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

print(",".join([f"{c}{f}" for c, f in compress(frametypes)]))
3 changes: 3 additions & 0 deletions src/DatatypeBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ void bind_featuretrackerconfig(pybind11::module& m, void* pCallstack);
void bind_imagemanipconfig(pybind11::module& m, void* pCallstack);
void bind_imgdetections(pybind11::module& m, void* pCallstack);
void bind_imgframe(pybind11::module& m, void* pCallstack);
void bind_encodedframe(pybind11::module& m, void* pCallstack);
void bind_imudata(pybind11::module& m, void* pCallstack);
void bind_nndata(pybind11::module& m, void* pCallstack);
void bind_spatialimgdetections(pybind11::module& m, void* pCallstack);
Expand All @@ -39,6 +40,7 @@ void DatatypeBindings::addToCallstack(std::deque<StackFunction>& callstack) {
callstack.push_front(bind_imagemanipconfig);
callstack.push_front(bind_imgdetections);
callstack.push_front(bind_imgframe);
callstack.push_front(bind_encodedframe);
callstack.push_front(bind_imudata);
callstack.push_front(bind_nndata);
callstack.push_front(bind_spatialimgdetections);
Expand Down Expand Up @@ -74,6 +76,7 @@ void DatatypeBindings::bind(pybind11::module& m, void* pCallstack){
datatypeEnum
.value("Buffer", DatatypeEnum::Buffer)
.value("ImgFrame", DatatypeEnum::ImgFrame)
.value("EncodedFrame", DatatypeEnum::EncodedFrame)
.value("NNData", DatatypeEnum::NNData)
.value("ImageManipConfig", DatatypeEnum::ImageManipConfig)
.value("CameraControl", DatatypeEnum::CameraControl)
Expand Down
135 changes: 135 additions & 0 deletions src/pipeline/datatype/EncodedFrameBindings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "DatatypeBindings.hpp"
#include "depthai-shared/datatype/RawEncodedFrame.hpp"
#include "pipeline/CommonBindings.hpp"
#include <memory>
#include <unordered_map>

// depthai
#include "depthai/pipeline/datatype/EncodedFrame.hpp"

// pybind
#include <pybind11/chrono.h>
#include <pybind11/numpy.h>

void bind_encodedframe(pybind11::module &m, void *pCallstack) {

using namespace dai;

py::class_<RawEncodedFrame, RawBuffer, std::shared_ptr<RawEncodedFrame>>
rawEncodedFrame(m, "RawEncodedFrame", DOC(dai, RawEncodedFrame));
py::enum_<RawEncodedFrame::Profile> rawEncodedFrameProfile(rawEncodedFrame,
"Profile");
py::enum_<RawEncodedFrame::FrameType> rawEncodedFrameType(
rawEncodedFrame, "FrameType", DOC(dai, RawEncodedFrame, FrameType));
py::class_<EncodedFrame, Buffer, std::shared_ptr<EncodedFrame>> encodedFrame(
m, "EncodedFrame", DOC(dai, EncodedFrame));

///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
// Call the rest of the type defines, then perform the actual bindings
Callstack *callstack = (Callstack *)pCallstack;
auto cb = callstack->top();
callstack->pop();
cb(m, pCallstack);
// Actual bindings
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////

// Metadata / raw

rawEncodedFrame.def(py::init<>())
.def_readwrite("quality", &RawEncodedFrame::quality)
.def_readwrite("bitrate", &RawEncodedFrame::bitrate)
.def_readwrite("profile", &RawEncodedFrame::profile)
.def_readwrite("lossless", &RawEncodedFrame::lossless)
.def_readwrite("type", &RawEncodedFrame::type)
.def_readwrite("instanceNum", &RawEncodedFrame::instanceNum)
.def_readwrite("sequenceNum", &RawEncodedFrame::sequenceNum)
.def_property(
"ts",
[](const RawEncodedFrame &o) {
double ts = o.ts.sec + o.ts.nsec / 1000000000.0;
return ts;
},
[](RawEncodedFrame &o, double ts) {
o.ts.sec = ts;
o.ts.nsec = (ts - o.ts.sec) * 1000000000.0;
})
.def_property(
"tsDevice",
[](const RawEncodedFrame &o) {
double ts = o.tsDevice.sec + o.tsDevice.nsec / 1000000000.0;
return ts;
},
[](RawEncodedFrame &o, double ts) {
o.tsDevice.sec = ts;
o.tsDevice.nsec = (ts - o.tsDevice.sec) * 1000000000.0;
});

rawEncodedFrameProfile.value("JPEG", EncodedFrame::Profile::JPEG)
.value("AVC", EncodedFrame::Profile::AVC)
.value("HEVC", EncodedFrame::Profile::HEVC);

rawEncodedFrameType.value("I", EncodedFrame::FrameType::I)
.value("P", EncodedFrame::FrameType::P)
.value("B", EncodedFrame::FrameType::B)
.value("Unknown", EncodedFrame::FrameType::Unknown);

// Message
encodedFrame
.def(py::init<>())
// getters
.def("getTimestamp",
py::overload_cast<>(&EncodedFrame::getTimestamp, py::const_),
DOC(dai, EncodedFrame, getTimestamp))
.def("getTimestampDevice",
py::overload_cast<>(&EncodedFrame::getTimestampDevice, py::const_),
DOC(dai, EncodedFrame, getTimestampDevice))
.def("getInstanceNum", &EncodedFrame::getInstanceNum,
DOC(dai, EncodedFrame, getInstanceNum))
.def("getSequenceNum", &EncodedFrame::getSequenceNum,
DOC(dai, EncodedFrame, getSequenceNum))
.def("getExposureTime", &EncodedFrame::getExposureTime,
DOC(dai, EncodedFrame, getExposureTime))
.def("getSensitivity", &EncodedFrame::getSensitivity,
DOC(dai, EncodedFrame, getSensitivity))
.def("getColorTemperature", &EncodedFrame::getColorTemperature,
DOC(dai, EncodedFrame, getColorTemperature))
.def("getLensPosition", &EncodedFrame::getLensPosition,
DOC(dai, EncodedFrame, getLensPosition))
.def("getQuality", &EncodedFrame::getQuality,
DOC(dai, EncodedFrame, getQuality))
.def("getBitrate", &EncodedFrame::getBitrate,
DOC(dai, EncodedFrame, getBitrate))
.def("getFrameType", &EncodedFrame::getFrameType,
DOC(dai, EncodedFrame, getFrameType))
.def("getLossless", &EncodedFrame::getLossless,
DOC(dai, EncodedFrame, getLossless))
.def("getProfile", &EncodedFrame::getProfile,
DOC(dai, EncodedFrame, getProfile))

// setters
.def("setTimestamp", &EncodedFrame::setTimestamp,
DOC(dai, EncodedFrame, setTimestamp))
.def("setTimestampDevice", &EncodedFrame::setTimestampDevice,
DOC(dai, EncodedFrame, setTimestampDevice))
.def("setSequenceNum", &EncodedFrame::setSequenceNum,
DOC(dai, EncodedFrame, getSequenceNum))
.def("setQuality", &EncodedFrame::setQuality,
DOC(dai, EncodedFrame, getQuality))
.def("setBitrate", &EncodedFrame::setBitrate,
DOC(dai, EncodedFrame, getBitrate))
.def("setFrameType", &EncodedFrame::setFrameType,
DOC(dai, EncodedFrame, getFrameType))
.def("setLossless", &EncodedFrame::setLossless,
DOC(dai, EncodedFrame, getLossless))
.def("setProfile", &EncodedFrame::setProfile,
DOC(dai, EncodedFrame, getProfile));
// add aliases dai.ImgFrame.Type and dai.ImgFrame.Specs
m.attr("EncodedFrame").attr("FrameType") =
m.attr("RawEncodedFrame").attr("FrameType");
m.attr("EncodedFrame").attr("Profile") =
m.attr("RawEncodedFrame").attr("Profile");
}
1 change: 1 addition & 0 deletions src/pipeline/node/VideoEncoderBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void bind_videoencoder(pybind11::module& m, void* pCallstack){
videoEncoder
.def_readonly("input", &VideoEncoder::input, DOC(dai, node, VideoEncoder, input), DOC(dai, node, VideoEncoder, input))
.def_readonly("bitstream", &VideoEncoder::bitstream, DOC(dai, node, VideoEncoder, bitstream), DOC(dai, node, VideoEncoder, bitstream))
.def_readonly("out", &VideoEncoder::out, DOC(dai, node, VideoEncoder, out), DOC(dai, node, VideoEncoder, out))
.def("setDefaultProfilePreset", static_cast<void(VideoEncoder::*)(float, VideoEncoderProperties::Profile)>(&VideoEncoder::setDefaultProfilePreset), py::arg("fps"), py::arg("profile"), DOC(dai, node, VideoEncoder, setDefaultProfilePreset))
.def("setDefaultProfilePreset", [](VideoEncoder& v, int width, int height, float fps, VideoEncoderProperties::Profile profile){
PyErr_WarnEx(PyExc_DeprecationWarning, "Input width/height no longer needed, automatically determined from first frame", 1);
Expand Down