diff --git a/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt index d3d6e995d..a6a740e3b 100644 --- a/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt +++ b/Assets/MediaPipe/Examples/Resources/face_detection_desktop_live.txt @@ -40,10 +40,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Subgraph that detects faces. node { calculator: "FaceDetectionFrontCpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" output_stream: "DETECTIONS:face_detections" } diff --git a/Assets/MediaPipe/Examples/Resources/face_detection_mobile_gpu.txt b/Assets/MediaPipe/Examples/Resources/face_detection_mobile_gpu.txt index 255c7983b..51aed3d14 100644 --- a/Assets/MediaPipe/Examples/Resources/face_detection_mobile_gpu.txt +++ b/Assets/MediaPipe/Examples/Resources/face_detection_mobile_gpu.txt @@ -40,10 +40,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Subgraph that detects faces. node { calculator: "FaceDetectionFrontGpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" output_stream: "DETECTIONS:face_detections" } diff --git a/Assets/MediaPipe/Examples/Resources/face_mesh_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/face_mesh_desktop_live.txt index f9fbd1184..bf519632a 100644 --- a/Assets/MediaPipe/Examples/Resources/face_mesh_desktop_live.txt +++ b/Assets/MediaPipe/Examples/Resources/face_mesh_desktop_live.txt @@ -47,10 +47,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Subgraph that detects faces and corresponding landmarks. node { calculator: "FaceLandmarkFrontCpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" input_side_packet: "NUM_FACES:num_faces" output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" diff --git a/Assets/MediaPipe/Examples/Resources/face_mesh_mobile.txt b/Assets/MediaPipe/Examples/Resources/face_mesh_mobile.txt index 821deefab..5e4632be2 100644 --- a/Assets/MediaPipe/Examples/Resources/face_mesh_mobile.txt +++ b/Assets/MediaPipe/Examples/Resources/face_mesh_mobile.txt @@ -50,10 +50,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Subgraph that detects faces and corresponding landmarks. node { calculator: "FaceLandmarkFrontGpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" input_side_packet: "NUM_FACES:num_faces" output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" diff --git a/Assets/MediaPipe/Examples/Resources/hair_segmentation_mobile_gpu.txt b/Assets/MediaPipe/Examples/Resources/hair_segmentation_mobile_gpu.txt index c34cc36a7..d1abfaf48 100644 --- a/Assets/MediaPipe/Examples/Resources/hair_segmentation_mobile_gpu.txt +++ b/Assets/MediaPipe/Examples/Resources/hair_segmentation_mobile_gpu.txt @@ -56,6 +56,7 @@ node: { [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { output_width: 512 output_height: 512 + rotation_mode: 3 } } } diff --git a/Assets/MediaPipe/Examples/Resources/hand_tracking_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/hand_tracking_desktop_live.txt index 981a38c6e..37397847a 100644 --- a/Assets/MediaPipe/Examples/Resources/hand_tracking_desktop_live.txt +++ b/Assets/MediaPipe/Examples/Resources/hand_tracking_desktop_live.txt @@ -34,10 +34,21 @@ output_stream: "hand_rects_from_palm_detections" output_stream: "hand_landmarks_presence" output_stream: "palm_detections_presence" +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Detects/tracks hand landmarks. node { calculator: "HandLandmarkTrackingCpu" - input_stream: "IMAGE:input_video" + input_stream: "IMAGE:transformed_input_video" input_side_packet: "NUM_HANDS:num_hands" output_stream: "LANDMARKS:hand_landmarks" output_stream: "HANDEDNESS:handedness" diff --git a/Assets/MediaPipe/Examples/Resources/hand_tracking_mobile.txt b/Assets/MediaPipe/Examples/Resources/hand_tracking_mobile.txt index 623de8f5d..d7969a78c 100644 --- a/Assets/MediaPipe/Examples/Resources/hand_tracking_mobile.txt +++ b/Assets/MediaPipe/Examples/Resources/hand_tracking_mobile.txt @@ -54,10 +54,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Detects/tracks hand landmarks. node { calculator: "HandLandmarkTrackingGpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" input_side_packet: "NUM_HANDS:num_hands" output_stream: "LANDMARKS:hand_landmarks" output_stream: "HANDEDNESS:handedness" diff --git a/Assets/MediaPipe/Examples/Resources/iris_tracking_cpu.txt b/Assets/MediaPipe/Examples/Resources/iris_tracking_cpu.txt index 9eb2f7bcf..9ab94e3b0 100644 --- a/Assets/MediaPipe/Examples/Resources/iris_tracking_cpu.txt +++ b/Assets/MediaPipe/Examples/Resources/iris_tracking_cpu.txt @@ -38,10 +38,21 @@ node { } } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Detects faces and corresponding landmarks. node { calculator: "FaceLandmarkFrontCpu" - input_stream: "IMAGE:input_video" + input_stream: "IMAGE:transformed_input_video" input_side_packet: "NUM_FACES:num_faces" output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" @@ -107,7 +118,7 @@ node { # Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI). node { calculator: "IrisLandmarkLeftAndRightCpu" - input_stream: "IMAGE:input_video" + input_stream: "IMAGE:transformed_input_video" input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks" input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks" output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" diff --git a/Assets/MediaPipe/Examples/Resources/iris_tracking_gpu.txt b/Assets/MediaPipe/Examples/Resources/iris_tracking_gpu.txt index 5fd229385..3dd98c99e 100644 --- a/Assets/MediaPipe/Examples/Resources/iris_tracking_gpu.txt +++ b/Assets/MediaPipe/Examples/Resources/iris_tracking_gpu.txt @@ -46,6 +46,17 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Defines how many faces to detect. Iris tracking currently only handles one # face (left and right eye), and therefore this should always be set to 1. node { @@ -61,7 +72,7 @@ node { # Detects faces and corresponding landmarks. node { calculator: "FaceLandmarkFrontGpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" input_side_packet: "NUM_FACES:num_faces" output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" @@ -127,7 +138,7 @@ node { # Detects iris landmarks, eye contour landmarks, and corresponding rect (ROI). node { calculator: "IrisLandmarkLeftAndRightGpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" input_stream: "LEFT_EYE_BOUNDARY_LANDMARKS:left_eye_boundary_landmarks" input_stream: "RIGHT_EYE_BOUNDARY_LANDMARKS:right_eye_boundary_landmarks" output_stream: "LEFT_EYE_CONTOUR_LANDMARKS:left_eye_contour_landmarks" diff --git a/Assets/MediaPipe/Examples/Resources/object_detection_desktop_live.txt b/Assets/MediaPipe/Examples/Resources/object_detection_desktop_live.txt index 856dd9799..f630b77ba 100644 --- a/Assets/MediaPipe/Examples/Resources/object_detection_desktop_live.txt +++ b/Assets/MediaPipe/Examples/Resources/object_detection_desktop_live.txt @@ -56,6 +56,7 @@ node: { [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { output_width: 320 output_height: 320 + rotation_mode: 3 } } } diff --git a/Assets/MediaPipe/Examples/Resources/object_detection_mobile_gpu.txt b/Assets/MediaPipe/Examples/Resources/object_detection_mobile_gpu.txt index b4c1394d1..01c87c9ef 100644 --- a/Assets/MediaPipe/Examples/Resources/object_detection_mobile_gpu.txt +++ b/Assets/MediaPipe/Examples/Resources/object_detection_mobile_gpu.txt @@ -57,6 +57,7 @@ node: { [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { output_width: 320 output_height: 320 + rotation_mode: 3 } } } diff --git a/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_cpu.txt b/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_cpu.txt index 308e608d3..acf76e728 100644 --- a/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_cpu.txt +++ b/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_cpu.txt @@ -42,10 +42,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE:throttled_input_video" + output_stream: "IMAGE:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Subgraph that detects poses and corresponding landmarks. node { calculator: "PoseLandmarkUpperBodyCpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" output_stream: "LANDMARKS:pose_landmarks" output_stream: "DETECTION:pose_detection" output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" @@ -54,7 +65,7 @@ node { # Calculates size of the image. node { calculator: "ImagePropertiesCalculator" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" output_stream: "SIZE:image_size" } diff --git a/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_gpu.txt b/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_gpu.txt index e31518663..1215d3b86 100644 --- a/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_gpu.txt +++ b/Assets/MediaPipe/Examples/Resources/upper_body_pose_tracking_gpu.txt @@ -42,10 +42,21 @@ node { output_stream: "throttled_input_video" } +node: { + calculator: "ImageTransformationCalculator" + input_stream: "IMAGE_GPU:throttled_input_video" + output_stream: "IMAGE_GPU:transformed_input_video" + node_options: { + [type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] { + rotation_mode: 3 + } + } +} + # Subgraph that detects poses and corresponding landmarks. node { calculator: "PoseLandmarkUpperBodyGpu" - input_stream: "IMAGE:throttled_input_video" + input_stream: "IMAGE:transformed_input_video" output_stream: "LANDMARKS:pose_landmarks" output_stream: "DETECTION:pose_detection" output_stream: "ROI_FROM_LANDMARKS:roi_from_landmarks" @@ -54,7 +65,7 @@ node { # Calculates size of the image. node { calculator: "ImagePropertiesCalculator" - input_stream: "IMAGE_GPU:throttled_input_video" + input_stream: "IMAGE_GPU:transformed_input_video" output_stream: "SIZE:image_size" } diff --git a/Assets/MediaPipe/Examples/Scenes/DesktopCPU.unity b/Assets/MediaPipe/Examples/Scenes/DesktopCPU.unity index 701a1fbf8..9d519b60f 100644 --- a/Assets/MediaPipe/Examples/Scenes/DesktopCPU.unity +++ b/Assets/MediaPipe/Examples/Scenes/DesktopCPU.unity @@ -824,6 +824,7 @@ GameObject: - component: {fileID: 496037461} - component: {fileID: 496037460} - component: {fileID: 496037459} + - component: {fileID: 496037464} m_Layer: 0 m_Name: WebCamScreen m_TagString: Untagged @@ -846,7 +847,7 @@ MonoBehaviour: DefaultWidth: 640 DefaultHeight: 480 FPS: 30 - focalLengthPx: 2 + DefaultFocalLengthPx: 2 --- !u!64 &496037460 MeshCollider: m_ObjectHideFlags: 0 @@ -922,6 +923,19 @@ Transform: m_Father: {fileID: 0} m_RootOrder: 2 m_LocalEulerAnglesHint: {x: 90, y: 180, z: 0} +--- !u!114 &496037464 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_GameObject: {fileID: 496037458} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: d5da564da19cb6b7d8e4f97f269edc5d, type: 3} + m_Name: + m_EditorClassIdentifier: + poolSize: 20 --- !u!1 &624144232 GameObject: m_ObjectHideFlags: 0 diff --git a/Assets/MediaPipe/Examples/Scripts/DemoGraph.cs b/Assets/MediaPipe/Examples/Scripts/DemoGraph.cs index da5d99054..69e9af6b4 100644 --- a/Assets/MediaPipe/Examples/Scripts/DemoGraph.cs +++ b/Assets/MediaPipe/Examples/Scripts/DemoGraph.cs @@ -39,7 +39,8 @@ public Status PushInput(TextureFrame textureFrame) { ImageFrame imageFrame = null; if (!IsGpuEnabled()) { - imageFrame = CopyPixelsFrom(textureFrame); + imageFrame = new ImageFrame( + ImageFormat.Format.SRGBA, textureFrame.width, textureFrame.height, 4 * textureFrame.width, textureFrame.GetRawNativeByteArray()); var packet = new ImageFramePacket(imageFrame, timestamp); return graph.AddPacketToInputStream(inputStream, packet); @@ -53,16 +54,12 @@ public Status PushInput(TextureFrame textureFrame) { var glTextureBuffer = new GlTextureBuffer((UInt32)glTextureName, textureFrame.width, textureFrame.height, textureFrame.gpuBufferformat, textureFrame.OnRelease, glContext); var gpuBuffer = new GpuBuffer(glTextureBuffer); - var texture = gpuHelper.CreateSourceTexture(gpuBuffer); - var gpuFrame = texture.GetGpuBufferFrame(); - - Gl.Flush(); - texture.Release(); return graph.AddPacketToInputStream(inputStream, new GpuBufferPacket(gpuBuffer, timestamp)); }); #else - imageFrame = CopyPixelsFrom(textureFrame); + imageFrame = new ImageFrame( + ImageFormat.Format.SRGBA, textureFrame.width, textureFrame.height, 4 * textureFrame.width, textureFrame.GetRawNativeByteArray()); return gpuHelper.RunInGlContext(() => { var texture = gpuHelper.CreateSourceTexture(imageFrame); @@ -76,10 +73,6 @@ public Status PushInput(TextureFrame textureFrame) { #endif } - private ImageFrame CopyPixelsFrom(TextureFrame textureFrame) { - return ImageFrame.FromPixels32(textureFrame.GetPixels32(), textureFrame.width, textureFrame.height, true); - } - public abstract void RenderOutput(WebCamScreenController screenController, TextureFrame textureFrame); public void Stop() { diff --git a/Assets/MediaPipe/Examples/Scripts/ResourceManager/TextureFrame.cs b/Assets/MediaPipe/Examples/Scripts/ResourceManager/TextureFrame.cs index 3450b6d23..c13ec141e 100644 --- a/Assets/MediaPipe/Examples/Scripts/ResourceManager/TextureFrame.cs +++ b/Assets/MediaPipe/Examples/Scripts/ResourceManager/TextureFrame.cs @@ -1,7 +1,9 @@ using Mediapipe; using System; using System.Runtime.InteropServices; +using Unity.Collections; using UnityEngine; +using UnityEngine.Experimental.Rendering; public class TextureFrame { private Texture2D texture; @@ -15,6 +17,14 @@ public int height { get { return texture.height; } } + public GraphicsFormat graphicsFormat { + get { return texture.graphicsFormat; } + } + + public TextureFormat format { + get { return texture.format; } + } + public TextureFrame(int width, int height) { this.texture = new Texture2D(width, height, TextureFormat.BGRA32, false); releaseCallbackHandle = GCHandle.Alloc((GlTextureBuffer.DeletionCallback)this.OnRelease, GCHandleType.Pinned); @@ -40,6 +50,11 @@ public Color32[] GetPixels32() { return texture.GetPixels32(); } + // TODO: implement generic method + public NativeArray GetRawNativeByteArray() { + return texture.GetRawTextureData(); + } + public IntPtr GetNativeTexturePtr() { return texture.GetNativeTexturePtr(); } diff --git a/Assets/MediaPipe/SDK/Scripts/Format.cs b/Assets/MediaPipe/SDK/Scripts/Format.cs index 5cc78d084..58c51c676 100644 --- a/Assets/MediaPipe/SDK/Scripts/Format.cs +++ b/Assets/MediaPipe/SDK/Scripts/Format.cs @@ -12,6 +12,7 @@ public class Format { /// In , pixels are laid out left to right, bottom to top, /// but in the returned array, left to right, top to bottom. /// + [Obsolete("FromPixels32 is deprecated, use Texture2D#GetRawNativeData")] public static NativeArray FromPixels32(Color32[] colors, int width, int height, bool isFlipped = false, Allocator allocator = Allocator.Temp) { var pixelData = new NativeArray(colors.Length * 4, allocator, NativeArrayOptions.UninitializedMemory); diff --git a/Assets/MediaPipe/SDK/Scripts/Framework/Formats/ImageFrame.cs b/Assets/MediaPipe/SDK/Scripts/Framework/Formats/ImageFrame.cs index 602f3004b..8435ce9eb 100644 --- a/Assets/MediaPipe/SDK/Scripts/Framework/Formats/ImageFrame.cs +++ b/Assets/MediaPipe/SDK/Scripts/Framework/Formats/ImageFrame.cs @@ -146,6 +146,7 @@ public Color32[] GetColor32s(bool isFlipped = false) { return Mediapipe.Format.FromBytePtr(MutablePixelData(), Format(), Width(), Height(), WidthStep(), isFlipped); } + [Obsolete("FromPixels32() is deprecated")] public static ImageFrame FromPixels32(Color32[] colors, int width, int height, bool isFlipped = false) { return new ImageFrame(ImageFormat.Format.SRGBA, width, height, 4 * width, Mediapipe.Format.FromPixels32(colors, width, height, isFlipped)); } diff --git a/C/mediapipe_api/BUILD b/C/mediapipe_api/BUILD index ce61f4ce2..f85facd97 100644 --- a/C/mediapipe_api/BUILD +++ b/C/mediapipe_api/BUILD @@ -65,6 +65,7 @@ cc_library( deps = [ "@com_google_mediapipe//mediapipe/calculators/core:pass_through_calculator", "@com_google_mediapipe//mediapipe/calculators/core:packet_presence_calculator", + "@com_google_mediapipe//mediapipe/calculators/image:image_transformation_calculator", ] + select({ "@com_google_mediapipe//mediapipe/gpu:disable_gpu": [ "@com_google_mediapipe//mediapipe/graphs/face_detection:desktop_live_calculators", @@ -75,6 +76,7 @@ cc_library( "@com_google_mediapipe//mediapipe/graphs/object_detection:desktop_tflite_calculators", ], "//conditions:default": [ + "@com_google_mediapipe//mediapipe/gpu:gl_scaler_calculator", "@com_google_mediapipe//mediapipe/graphs/face_detection:desktop_live_gpu_calculators", "@com_google_mediapipe//mediapipe/graphs/face_mesh:desktop_live_gpu_calculators", "@com_google_mediapipe//mediapipe/graphs/hair_segmentation:mobile_calculators",