You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi,
I'm working on a project that does face recognition in real-time using ar glass(nreal light).
'shape_predictor_68_face_landmarks.dat' and 'dlib_face_recognition_resnet_model_v1.dat' are loaded successfully, but there seems to be a problem in the part where the operator of detector is called.
I tried two ways to use the camera texture in dlibdotnet.
First, I used nreal light's rgbcamera to get texture2d and converted it to Matrix type, but FrontalFaceDetector.Operator() doesn't seem to be applied.
Second, I used data of type byte[] and the result was same.
So my point is 1. I want to know if real-time face recognition is possible.
2. I wonder if data in byte[] format or Unity's texture2d format is available on dlibdotnet.
Any advice about this issue would be appreciated!
Environment
The ar glass model is nreal light and I am working in unity environment.
Also, android phone model is samsung galaxy s22.
Code:
using DlibDotNet;
using NRKernal;
using UnityEngine;
using System;
using UnityEngine.UI;
...
public class SpeakerRecognition : MonoBehaviour {
public RawImage captureImage;
public Button activeButton;
private bool isActivated = false;
private FrontalFaceDetector detector;
private ShapePredictor sp;
private DlibDotNet.Dnn.LossMetric net;
private List<Matrix<float>> knownFaces = new List<Matrix<float>>();
private List<string> knownNames = new List<string>();
private List<float> faceDiffs = new List<float>();
private NRRGBCamTexture camTexture;
void Start()
{
detector = Dlib.GetFrontalFaceDetector();
sp = ShapePredictor.Deserialize(Application.dataPath + "/Plugins/shape_predictor_68_face_landmarks.dat");
net = DlibDotNet.Dnn.LossMetric.Deserialize(Application.dataPath + "/Plugins/dlib_face_recognition_resnet_model_v1.dat");
LoadKnownFaces();
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
activeButton.onClick.AddListener(ToggleActivation);
}
private IEnumerator RecognizeSpeaker() {
// while camera is activated
if(isActivated){
// convert texture2d to matrix
int width = camTexture.Width;
int height = camTexture.Height;
Matrix<RgbPixel> img = new Matrix<RgbPixel>(height, width);
Color32[] pixels = camTexture.GetTexture().GetPixels32();
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
Color32 pixel = pixels[y * width + x];
RgbPixel rgbPixel = new RgbPixel(pixel.r, pixel.g, pixel.b);
img[y, x] = rgbPixel;
}
}
var faces = new List<Matrix<RgbPixel>>();
var shapes = new List<FullObjectDetection>();
foreach(var face in detector.Operator(img))
{
var shape = sp.Detect(img, face);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
shapes.Add(shape);
faces.Add(faceChip);
}
var faceDescriptors = net.Operator(faces);
for(int i = 0; i < faceDescriptors.Count; i++)
{
//
// DrawFaceLandmarks(shapes[i]);
//
debugLog.text = "descripter " + i;
faceDiffs.Clear();
for(int j = 0; j < knownFaces.Count; j++)
{
var diff = Dlib.Length(knownFaces[j] - faceDescriptors[i]);
faceDiffs.Add(diff);
}
var index = faceDiffs.IndexOf(minDiff);
var matchRate = 1 / (1 + minDiff);
if(matchRate > 0.5)
{
...
}
}
}
yield return null;
}
public void LoadKnownFaces() {
debugLog.text = "load known faces";
string relativePath = "NRSDK/Demos/OverTheGlass/Scripts/faces";
string absolutePath = Application.dataPath + "/" + relativePath;
foreach (var file in System.IO.Directory.GetFiles(absolutePath, "*.jpg"))
{
var img = Dlib.LoadImageAsMatrix<RgbPixel>(file);
var faces = detector.Operator(img);
if (faces.Any())
{
// Detect face
var shape = sp.Detect(img, faces[0]);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
// Get face descriptor
var faceDescriptor = net.Operator(faceChip);
// Add known face and name informations
knownFaces.Add(faceDescriptor.First());
knownNames.Add(System.IO.Path.GetFileNameWithoutExtension(file));
}
else
{
debugLog.text = "No face found in " + file;
}
}
lipDiffs = new List<double>(knownFaces.Count);
return;
}
public void ToggleActivation()
{
isActivated = !isActivated;
if (isActivated)
{
if(camTexture == null)
{
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
}
camTexture.Play();
captureImage.texture = camTexture.GetTexture();
StartCoroutine(RecognizeSpeaker());
}
else
{
camTexture?.Stop();
camTexture = null;
StopCoroutine(RecognizeSpeaker());
}
}
void OnDestroy()
{
camTexture?.Stop();
camTexture = null;
}
}
using System.Collections.Generic;
using DlibDotNet;
using NRKernal;
using UnityEngine;
using System;
using System.Linq;
using UnityEngine.UI;
using System.Collections;
using OpenCvSharp;
using System.Runtime.InteropServices;
public class SpeakerRecognition : MonoBehaviour {
public Text debugLog;
public RawImage captureImage;
public Text speakerName;
public Button activeButton;
public GameObject spherePrefab;
private bool isActivated = false;
private FrontalFaceDetector detector;
private ShapePredictor sp;
private DlibDotNet.Dnn.LossMetric net;
private List<Matrix<float>> knownFaces = new List<Matrix<float>>();
private List<string> knownNames = new List<string>();
private List<LipMovement> movements = new List<LipMovement>();
private (double, double) latestSpeakerPosition;
private List<float> faceDiffs = new List<float>();
private List<double> lipDiffs = new List<double>();
private NRRGBCamTexture camTexture;
private Texture2D mTex2d;
void Start()
{
detector = Dlib.GetFrontalFaceDetector();
sp = ShapePredictor.Deserialize(Application.dataPath + "/Plugins/shape_predictor_68_face_landmarks.dat");
net = DlibDotNet.Dnn.LossMetric.Deserialize(Application.dataPath + "/Plugins/dlib_face_recognition_resnet_model_v1.dat");
LoadKnownFaces();
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
mTex2d = camTexture.GetTexture();
activeButton.onClick.AddListener(ToggleActivation);
}
private IEnumerator RecognizeSpeaker() {
// 카메라 작동 중일 때 계속 실행
while(isActivated){
if(!isActivated || camTexture == null)
{
break;
}
var temp = new Mat();
// Get raw data from Texture2D
var array = mTex2d.GetRawTextureData();
Marshal.Copy(array, 0, temp.Data, array.Length);
var cimg = Dlib.LoadImageData<BgrPixel>(temp.Data, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()));
var faces = new List<Matrix<RgbPixel>>();
var shapes = new List<FullObjectDetection>();
foreach(var face in detector.Operator(cimg))
{
var shape = sp.Detect(img, face);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
shapes.Add(shape);
faces.Add(faceChip);
}
var faceDescriptors = net.Operator(faces);
for(int i = 0; i < faceDescriptors.Count; i++)
{
faceDiffs.Clear();
for(int j = 0; j < knownFaces.Count; j++)
{
var diff = Dlib.Length(knownFaces[j] - faceDescriptors[i]);
faceDiffs.Add(diff);
}
var minDiff = faceDiffs.Min();
var index = faceDiffs.IndexOf(minDiff);
var matchRate = 1 / (1 + minDiff);
if(matchRate > 0.5)
{
var eyeDistance = (shapes[i].GetPart(37) - shapes[i].GetPart(44)).Length;
var lipHeight = (shapes[i].GetPart(62) - shapes[i].GetPart(66)).Length;
var lipWidth = (shapes[i].GetPart(48) - shapes[i].GetPart(54)).Length;
(double, double) averageLength = movements[index].CheckMovement(lipWidth / eyeDistance * 100, lipHeight / eyeDistance * 100);
if(averageLength.Item1 > 2 || averageLength.Item2 > 2)
{
lipDiffs[index] = averageLength.Item1 + averageLength.Item2;
}
}
}
speakerName.text = knownNames[lipDiffs.IndexOf(lipDiffs.Min())];
}
yield return null;
}
public void LoadKnownFaces() {
debugLog.text = "load known faces";
string relativePath = "NRSDK/Demos/OverTheGlass/Scripts/faces";
string absolutePath = Application.dataPath + "/" + relativePath;
foreach (var file in System.IO.Directory.GetFiles(absolutePath, "*.jpg"))
{
var img = Dlib.LoadImageAsMatrix<RgbPixel>(file);
var faces = detector.Operator(img);
if (faces.Any())
{
// Detect face
var shape = sp.Detect(img, faces[0]);
var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
var faceChip = Dlib.ExtractImageChip<RgbPixel>(img, faceChipDetail);
// Get face descriptor
var faceDescriptor = net.Operator(faceChip);
// Add known face and name informations
knownFaces.Add(faceDescriptor.First());
knownNames.Add(System.IO.Path.GetFileNameWithoutExtension(file));
}
else
{
Debug.Log("No face found in " + file);
}
}
lipDiffs = new List<double>(knownFaces.Count);
return;
}
internal class LipMovement
{
private string name;
private System.Collections.Generic.Queue<double> widthDiffs;
private System.Collections.Generic.Queue<double> heightDiffs;
private double prevHeight;
private double prevWidth;
public LipMovement(string name)
{
this.name = name;
widthDiffs = new System.Collections.Generic.Queue<double>(3);
heightDiffs = new System.Collections.Generic.Queue<double>(3);
prevHeight = 0;
prevWidth = 0;
}
public (double, double) CheckMovement(double width, double height)
{
heightDiffs.Enqueue(Math.Abs(prevHeight - height));
widthDiffs.Enqueue(Math.Abs(prevWidth - width));
List<double> widthNumbers = new List<double>(widthDiffs);
List<double> heightNumbers = new List<double>(heightDiffs);
double widthAverage = widthNumbers.Sum() / widthNumbers.Count;
double heightAverage = heightNumbers.Sum() / heightNumbers.Count;
prevHeight = height;
prevWidth = width;
return (Math.Round(widthAverage, 3), Math.Round(heightAverage, 3));
}
}
public void ToggleActivation()
{
isActivated = !isActivated;
if (isActivated)
{
if(camTexture == null)
{
camTexture = new NRRGBCamTexture();
captureImage.texture = camTexture.GetTexture();
}
camTexture.Play();
captureImage.texture = camTexture.GetTexture();
StartCoroutine(RecognizeSpeaker());
}
else
{
camTexture?.Stop();
camTexture = null;
StopCoroutine(RecognizeSpeaker());
}
}
void OnDestroy()
{
camTexture?.Stop();
camTexture = null;
}
}
Output:
I checked that the while statement, LoadKnownFaces() camera are running fine, but foreach(var face in detector.Opertaor(img)) doesn't seem to be running.
The text was updated successfully, but these errors were encountered:
seoyeon22
changed the title
Problems with detector operation (using ar glass and unity and dlibdotnet)
Face recognition in real time (using ar glass and unity and dlibdotnet)
Jun 22, 2023
Summary of issue
Hi,
I'm working on a project that does face recognition in real-time using ar glass(nreal light).
'shape_predictor_68_face_landmarks.dat' and 'dlib_face_recognition_resnet_model_v1.dat' are loaded successfully, but there seems to be a problem in the part where the operator of detector is called.
I tried two ways to use the camera texture in dlibdotnet.
First, I used nreal light's rgbcamera to get texture2d and converted it to Matrix type, but FrontalFaceDetector.Operator() doesn't seem to be applied.
Second, I used data of type byte[] and the result was same.
So my point is
1. I want to know if real-time face recognition is possible.
2. I wonder if data in byte[] format or Unity's texture2d format is available on dlibdotnet.
Any advice about this issue would be appreciated!
Environment
The ar glass model is nreal light and I am working in unity environment.
Also, android phone model is samsung galaxy s22.
Code:
Output:
I checked that the while statement, LoadKnownFaces() camera are running fine, but foreach(var face in detector.Opertaor(img)) doesn't seem to be running.
The text was updated successfully, but these errors were encountered: