Skip to content

Hand detect data stream #813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@ build/

sdk/include/aditof/version.h

.idea/

*.temp/
.vscode*
182 changes: 182 additions & 0 deletions bindings/python/examples/gesture_rec/notebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@

from tkinter import *
from tkinter import ttk
import aditofpython as tof
import time
import concurrent.futures
import numpy as np
import cv2 as cv
from enum import Enum
from process import ProcessTab

smallSignalThreshold = 100


class ModesEnum(Enum):
MODE_NEAR = 0
MODE_MEDIUM = 1
MODE_FAR = 2


def calc_process(depth_map):
process = ProcessTab(depth_map)
bounding_box = process.max_area.bbox

return process.resultVar, (bounding_box[1], bounding_box[0]), (bounding_box[3], bounding_box[2])


class GestureDemo(Frame):

def __init__(self, name='gesturedemo'):
Frame.__init__(self, name=name)
self.pack(expand=Y, fill=BOTH)
self.master.title('Gesture Demo')
self.resultVar = StringVar()
self.box_start_point = (0, 0)
self.box_end_point = (0, 0)
self._create_main_panel()

def _create_main_panel(self):

main_panel = Frame(self, name='demo')
main_panel.pack(side=TOP, fill=BOTH, expand=Y)

# create the notebook
nb = ttk.Notebook(main_panel, name='gesturedemo')

nb.pack(fill=BOTH, expand=Y, padx=2, pady=3)

self._create_video_tab(nb)

# =============================================================================
def _create_video_tab(self, nb):
# frame to hold content
frame = Frame(nb, name='video')
# widgets to be displayed on Video tab
msg = ["Capture an image for processing"]
lbl = Label(frame, justify=LEFT, anchor=N,
text=''.join(msg))
lbl_frame = LabelFrame(frame, bg='red')
btn = Button(frame, text='Init. Dev', underline=0,
command=lambda: self._init_dev())
btn_start = Button(frame, text='Start Video', underline=0,
command=lambda: self._start_video())

# position and set resize behaviour
lbl.grid(row=0, column=0)
lbl_frame.grid(row=0, column=1, columnspan=4)
btn.grid(row=1, column=0, pady=(2, 4))
btn_start.grid(row=2, column=0, pady=(2, 4))

self.resultVar.set("How many fingers?")
lbl_result = Label(frame, textvariable=self.resultVar, name='result')
lbl_result.grid(row=3, column=0)

nb.add(frame, text='Video', padding=2)

# =============================================================================
def _init_dev(self):
system = tof.System()
print(system)

self.cameras = []

status = system.getCameraList(self.cameras)
if not status:
print("system.getCameraList failed with status: ", status)

status = self.cameras[0].initialize()
if not status:
print("cameras[0].initialize() failed with status: ", status)

modes = []
status = self.cameras[0].getAvailableModes(modes)
if not status:
print("system.getAvailableModes() failed with status: ", status)

types = []
status = self.cameras[0].getAvailableFrameTypes(types)
if not status:
print("system.getAvailableFrameTypes() failed with status: ", status)

# Use only depth image for faster conversion
status = self.cameras[0].setFrameType(types[1])
if not status:
print("cameras[0].setFrameType() failed with status:", status)

status = self.cameras[0].setMode(modes[ModesEnum.MODE_NEAR.value])
if not status:
print("cameras[0].setMode() failed with status: ", status)

# =============================================================================
def _start_video(self):
cam_details = tof.CameraDetails()
status = self.cameras[0].getDetails(cam_details)
if not status:
print("system.getDetails() failed with status: ", status)

# Enable noise reduction for better results
self.cameras[0].setControl("noise_reduction_threshold", str(smallSignalThreshold))

camera_range = cam_details.depthParameters.maxDepth
distance_scale = 255.0 / camera_range
tof_frame = tof.Frame()
computation_delay_start = time.time()
process_results = []
executor = concurrent.futures.ProcessPoolExecutor()

while True:
if cv.waitKey(1) != 255: break

# Capture frame-by-frame
status = self.cameras[0].requestFrame(tof_frame)
if not status:
print("cameras[0].requestFrame() failed with status: ", status)

depth_map = np.array(tof_frame.getData(tof.FrameDataType.Depth), dtype="uint16", copy=False)
# Creation of the Depth image
depth_map = depth_map[0: 480, 0:640]
depth_map = cv.flip(depth_map, 1)
depth_map = distance_scale * depth_map
depth_map = np.uint8(depth_map)

# Image to display
img = cv.applyColorMap(depth_map, cv.COLORMAP_RAINBOW)
cv.rectangle(img, self.box_start_point, self.box_end_point, (0, 255, 0), 15)
cv.putText(img, self.resultVar.get(), tuple(coord + 5 for coord in self.box_start_point),
cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255))
cv.namedWindow('Depth image', cv.WINDOW_AUTOSIZE)
cv.imshow('Depth image', img)

# if process_results != []:
process_results = self.update_display(process_results)

# Process image every 1s
if (time.time() - computation_delay_start) <= 1: continue

p = executor.submit(calc_process, depth_map)
process_results.append(p)
computation_delay_start = time.time()

executor.shutdown()
cv.destroyWindow("Depth image")

def update_display(self, process_results):
to_delete = []
processes = concurrent.futures.as_completed(process_results)
for p in processes:
try:
result, self.box_start_point, self.box_end_point = p.result()
self.resultVar.set(result)
except Exception as e:
self.resultVar.set("None")
print("Exception:", e)
finally:
self.update()
to_delete.append(p)

return [p for p in process_results if p not in to_delete]


if __name__ == '__main__':
GestureDemo().mainloop()
131 changes: 131 additions & 0 deletions bindings/python/examples/gesture_rec/process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import numpy as np
from skimage import measure
from scipy.spatial import ConvexHull
from scipy.ndimage import distance_transform_edt

# These are empirical values, which were found by trial and error
pixel_no_threshold = 200
analyzed_region_distance = 38.25
hand_radius_error = 1.9


class ProcessTab:

def __init__(self, img_obj):
self.stop_distance = 0.3
self.depth_img = img_obj
self.resultVar = "How many fingers?"
self._display_result()

# =============================================================================
def _display_result(self):
self._depth_img_hist()
self._detect_hand()
self._count_fingers()

# =============================================================================
def _depth_img_hist(self):
# hist[0] = number of elements
# hist[1] = distance normalized to 255 (this was done in notebook.py)
counts, bins = np.histogram(self.depth_img.ravel(), 512)
hist = [counts, bins]

start = 1
while start < (len(hist[0]) - 2) and hist[0][start] < pixel_no_threshold:
start += 1
bin_step = hist[1][start + 1] - hist[1][start]

stop = int((hist[1][start] + analyzed_region_distance - hist[1][0]) / bin_step)

self.stop_distance = hist[1][stop]
# Analyze the pixel only if it is closer than a threshold
self.binary_img = (self.depth_img < self.stop_distance) * 1

# =============================================================================
def _detect_hand(self):
# Find all the objects in the image and select the largest one, which is the hand
labels = measure.label(self.binary_img)
props = measure.regionprops(labels)
if len(props) == 0:
raise Exception("No object found.")

props.sort(key=lambda x: x.area, reverse=True)
self.max_area = props[0]
points = props[0].filled_image

# points_hull[0] = x_coordinate; points_hull[1] = y_coordinate
points_hull = np.where(points == 1)
self.cord = list(zip(points_hull[0], points_hull[1]))

# Compute the distance of non-zero points (hand) to the nearest zero point (background)
self.dist_map = distance_transform_edt(points)
# Indices of hand center, i.e. the point farthest from the background
self.hand_center = tuple(arr[0] for arr in np.where(self.dist_map == np.max(self.dist_map)))
self.radius = hand_radius_error * np.max(self.dist_map)

# =============================================================================
def _count_fingers(self):
# Find the convex hull = contour of the hand with extremities
hull = ConvexHull(self.cord)
cord_arr = np.array(self.cord)
vertices = cord_arr[hull.vertices]

# delta_x and delta_y bw two consecutive vertices
dist = np.append(vertices[0:len(vertices) - 1] - vertices[1:len(vertices)],
[vertices[-1] - vertices[0]], axis=0)

# distance bw 2 consecutive vertices
# In cdist variables the distance units are pixels
cdist = np.sqrt(dist[:, 0] ** 2 + dist[:, 1] ** 2)

# TODO: Use a better formula
# It is used to make cdist_threshold inversely proportional to stop_distance,
# while keeping it between 0 and 25
cdist_threshold = np.sqrt(1 - self.stop_distance / 255) * 25
cdist_bin = (cdist <= cdist_threshold) * 1

# Used to check whether a cdist smaller than the threshold
# is following a cdist bigger than the threshold
cdist_diff = np.append(cdist_bin[0:len(cdist_bin) - 1] - cdist_bin[1:len(cdist_bin)],
[cdist_bin[-1] - cdist_bin[0]], axis=0)

# Indices of vertices which correspond to fingertips
dist_idx = np.where((cdist_diff == -1) | ((cdist_diff == 0) & (cdist_bin == 0)))
dist_idx = np.array(dist_idx) + 1
# dist_idx is a double list
dist_idx = dist_idx[0]
if dist_idx[-1] == len(vertices):
dist_idx[-1] = 0
# Put 0 in front
dist_idx.sort()

# From vertices close to each other select the one which is farthest from the center
# Compute the possible fingertips distances to the center of the hand
finger_cdist = [0] * len(dist_idx)
for i in range(len(dist_idx)):
stop = dist_idx[i+1] if (i+1 < len(dist_idx)) else len(vertices)
for j in range(dist_idx[i], stop):
vertex_dist_from_center = vertices[j] - self.hand_center
vertex_cdist_from_center = np.sqrt(vertex_dist_from_center[0] ** 2 +
vertex_dist_from_center[1] ** 2)
if finger_cdist[i] < vertex_cdist_from_center:
finger_cdist[i] = vertex_cdist_from_center
dist_idx[i] = j

# Select actual fingertips
self.fingers = np.where(np.logical_and(finger_cdist > self.radius,
vertices[dist_idx, 0] < (self.hand_center[0] + self.radius)))

self.detect_gesture()

# =============================================================================
def detect_gesture(self):
if len(self.fingers[0]) == 5:
self.resultVar = "Found 5 extended fingers.Paper"
elif len(self.fingers[0]) == 2 or len(self.fingers[0]) == 3:
self.resultVar = "Found " + str(len(self.fingers[0])) + " extended fingers. Scissors"
elif len(self.fingers[0]) == 0:
self.resultVar = "Found " + str(len(self.fingers[0])) + " extended fingers. Rock"
else:
self.resultVar = "Found " + str(len(self.fingers[0])) + " extended fingers. Unknown gesture"