analogdevicesinc · DanielMatyas · Jul 5, 2023 · Jul 5, 2023 · Sep 18, 2023
diff --git a/.gitignore b/.gitignore
@@ -2,5 +2,7 @@ build/
 
 sdk/include/aditof/version.h
 
+.idea/
+
 *.temp/
 .vscode*
diff --git a/bindings/python/examples/gesture_rec/notebook.py b/bindings/python/examples/gesture_rec/notebook.py
@@ -0,0 +1,182 @@
+
+from tkinter import *
+from tkinter import ttk
+import aditofpython as tof
+import time
+import concurrent.futures
+import numpy as np
+import cv2 as cv
+from enum import Enum
+from process import ProcessTab
+
+smallSignalThreshold = 100
+
+
+class ModesEnum(Enum):
+    MODE_NEAR = 0
+    MODE_MEDIUM = 1
+    MODE_FAR = 2
+
+
+def calc_process(depth_map):
+    process = ProcessTab(depth_map)
+    bounding_box = process.max_area.bbox
+
+    return process.resultVar, (bounding_box[1], bounding_box[0]), (bounding_box[3], bounding_box[2])
+
+
+class GestureDemo(Frame):
+
+    def __init__(self, name='gesturedemo'):
+        Frame.__init__(self, name=name)
+        self.pack(expand=Y, fill=BOTH)
+        self.master.title('Gesture Demo')
+        self.resultVar = StringVar()
+        self.box_start_point = (0, 0)
+        self.box_end_point = (0, 0)
+        self._create_main_panel()
+
+    def _create_main_panel(self):
+
+        main_panel = Frame(self, name='demo')
+        main_panel.pack(side=TOP, fill=BOTH, expand=Y)
+
+        # create the notebook
+        nb = ttk.Notebook(main_panel, name='gesturedemo')
+
+        nb.pack(fill=BOTH, expand=Y, padx=2, pady=3)
+
+        self._create_video_tab(nb)
+
+    # =============================================================================
+    def _create_video_tab(self, nb):
+        # frame to hold content
+        frame = Frame(nb, name='video')
+        # widgets to be displayed on Video tab
+        msg = ["Capture an image for processing"]
+        lbl = Label(frame, justify=LEFT, anchor=N,
+                    text=''.join(msg))
+        lbl_frame = LabelFrame(frame, bg='red')
+        btn = Button(frame, text='Init. Dev', underline=0,
+                     command=lambda: self._init_dev())
+        btn_start = Button(frame, text='Start Video', underline=0,
+                           command=lambda: self._start_video())
+
+        # position and set resize behaviour
+        lbl.grid(row=0, column=0)
+        lbl_frame.grid(row=0, column=1, columnspan=4)
+        btn.grid(row=1, column=0, pady=(2, 4))
+        btn_start.grid(row=2, column=0, pady=(2, 4))
+
+        self.resultVar.set("How many fingers?")
+        lbl_result = Label(frame, textvariable=self.resultVar, name='result')
+        lbl_result.grid(row=3, column=0)
+
+        nb.add(frame, text='Video', padding=2)
+
+    # =============================================================================
+    def _init_dev(self):
+        system = tof.System()
+        print(system)
+
+        self.cameras = []
+
+        status = system.getCameraList(self.cameras)
+        if not status:
+            print("system.getCameraList failed with status: ", status)
+
+        status = self.cameras[0].initialize()
+        if not status:
+            print("cameras[0].initialize() failed with status: ", status)
+
+        modes = []
+        status = self.cameras[0].getAvailableModes(modes)
+        if not status:
+            print("system.getAvailableModes() failed with status: ", status)
+
+        types = []
+        status = self.cameras[0].getAvailableFrameTypes(types)
+        if not status:
+            print("system.getAvailableFrameTypes() failed with status: ", status)
+
+        # Use only depth image for faster conversion
+        status = self.cameras[0].setFrameType(types[1])
+        if not status:
+            print("cameras[0].setFrameType() failed with status:", status)
+
+        status = self.cameras[0].setMode(modes[ModesEnum.MODE_NEAR.value])
+        if not status:
+            print("cameras[0].setMode() failed with status: ", status)
+
+    # =============================================================================
+    def _start_video(self):
+        cam_details = tof.CameraDetails()
+        status = self.cameras[0].getDetails(cam_details)
+        if not status:
+            print("system.getDetails() failed with status: ", status)
+
+        # Enable noise reduction for better results
+        self.cameras[0].setControl("noise_reduction_threshold", str(smallSignalThreshold))
+
+        camera_range = cam_details.depthParameters.maxDepth
+        distance_scale = 255.0 / camera_range
+        tof_frame = tof.Frame()
+        computation_delay_start = time.time()
+        process_results = []
+        executor = concurrent.futures.ProcessPoolExecutor()
+
+        while True:
+            if cv.waitKey(1) != 255: break
+
+            # Capture frame-by-frame
+            status = self.cameras[0].requestFrame(tof_frame)
+            if not status:
+                print("cameras[0].requestFrame() failed with status: ", status)
+
+            depth_map = np.array(tof_frame.getData(tof.FrameDataType.Depth), dtype="uint16", copy=False)
+            # Creation of the Depth image
+            depth_map = depth_map[0: 480, 0:640]
+            depth_map = cv.flip(depth_map, 1)
+            depth_map = distance_scale * depth_map
+            depth_map = np.uint8(depth_map)
+
+            # Image to display
+            img = cv.applyColorMap(depth_map, cv.COLORMAP_RAINBOW)
+            cv.rectangle(img, self.box_start_point, self.box_end_point, (0, 255, 0), 15)
+            cv.putText(img, self.resultVar.get(), tuple(coord + 5 for coord in self.box_start_point),
+                       cv.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255))
+            cv.namedWindow('Depth image', cv.WINDOW_AUTOSIZE)
+            cv.imshow('Depth image', img)
+
+            # if process_results != []:
+            process_results = self.update_display(process_results)
+
+            # Process image every 1s
+            if (time.time() - computation_delay_start) <= 1: continue
+
+            p = executor.submit(calc_process, depth_map)
+            process_results.append(p)
+            computation_delay_start = time.time()
+
+        executor.shutdown()
+        cv.destroyWindow("Depth image")
+
+    def update_display(self, process_results):
+        to_delete = []
+        processes = concurrent.futures.as_completed(process_results)
+        for p in processes:
+            try:
+                result, self.box_start_point, self.box_end_point = p.result()
+                self.resultVar.set(result)
+            except Exception as e:
+                self.resultVar.set("None")
+                print("Exception:", e)
+            finally:
+                self.update()
+                to_delete.append(p)
+
+        return [p for p in process_results if p not in to_delete]
+
+
+if __name__ == '__main__':
+    GestureDemo().mainloop()
diff --git a/bindings/python/examples/gesture_rec/process.py b/bindings/python/examples/gesture_rec/process.py
@@ -0,0 +1,131 @@
+import numpy as np
+from skimage import measure
+from scipy.spatial import ConvexHull
+from scipy.ndimage import distance_transform_edt
+
+# These are empirical values, which were found by trial and error
+pixel_no_threshold = 200
+analyzed_region_distance = 38.25
+hand_radius_error = 1.9
+
+
+class ProcessTab:
+
+    def __init__(self, img_obj):
+        self.stop_distance = 0.3
+        self.depth_img = img_obj
+        self.resultVar = "How many fingers?"
+        self._display_result()
+
+    # =============================================================================
+    def _display_result(self):
+        self._depth_img_hist()
+        self._detect_hand()
+        self._count_fingers()
+
+    # =============================================================================
+    def _depth_img_hist(self):
+        # hist[0] = number of elements
+        # hist[1] = distance normalized to 255 (this was done in notebook.py)
+        counts, bins = np.histogram(self.depth_img.ravel(), 512)
+        hist = [counts, bins]
+
+        start = 1
+        while start < (len(hist[0]) - 2) and hist[0][start] < pixel_no_threshold:
+            start += 1
+        bin_step = hist[1][start + 1] - hist[1][start]
+
+        stop = int((hist[1][start] + analyzed_region_distance - hist[1][0]) / bin_step)
+
+        self.stop_distance =  hist[1][stop]
+        # Analyze the pixel only if it is closer than a threshold
+        self.binary_img = (self.depth_img < self.stop_distance) * 1
+
+    # =============================================================================
+    def _detect_hand(self):
+        # Find all the objects in the image and select the largest one, which is the hand
+        labels = measure.label(self.binary_img)
+        props = measure.regionprops(labels)
+        if len(props) == 0:
+            raise Exception("No object found.")
+
+        props.sort(key=lambda x: x.area, reverse=True)
+        self.max_area = props[0]
+        points = props[0].filled_image
+
+        # points_hull[0] = x_coordinate; points_hull[1] = y_coordinate
+        points_hull = np.where(points == 1)
+        self.cord = list(zip(points_hull[0], points_hull[1]))
+
+        # Compute the distance of non-zero points (hand) to the nearest zero point (background)
+        self.dist_map = distance_transform_edt(points)
+        # Indices of hand center, i.e. the point farthest from the background
+        self.hand_center = tuple(arr[0] for arr in np.where(self.dist_map == np.max(self.dist_map)))
+        self.radius = hand_radius_error * np.max(self.dist_map)
+
+    # =============================================================================
+    def _count_fingers(self):
+        # Find the convex hull = contour of the hand with extremities
+        hull = ConvexHull(self.cord)
+        cord_arr = np.array(self.cord)
+        vertices = cord_arr[hull.vertices]
+
+        # delta_x and delta_y bw two consecutive vertices
+        dist = np.append(vertices[0:len(vertices) - 1] - vertices[1:len(vertices)],
+                         [vertices[-1] - vertices[0]], axis=0)
+
+        # distance bw 2 consecutive vertices
+        # In cdist variables the distance units are pixels
+        cdist = np.sqrt(dist[:, 0] ** 2 + dist[:, 1] ** 2)
+
+        # TODO: Use a better formula
+        # It is used to make cdist_threshold inversely proportional to stop_distance,
+        # while keeping it between 0 and 25
+        cdist_threshold = np.sqrt(1 - self.stop_distance / 255) * 25
+        cdist_bin = (cdist <= cdist_threshold) * 1
+
+        # Used to check whether a cdist smaller than the threshold
+        # is following a cdist bigger than the threshold
+        cdist_diff = np.append(cdist_bin[0:len(cdist_bin) - 1] - cdist_bin[1:len(cdist_bin)],
+                               [cdist_bin[-1] - cdist_bin[0]], axis=0)
+
+        # Indices of vertices which correspond to fingertips
+        dist_idx = np.where((cdist_diff == -1) | ((cdist_diff == 0) & (cdist_bin == 0)))
+        dist_idx = np.array(dist_idx) + 1
+        # dist_idx is a double list
+        dist_idx = dist_idx[0]
+        if dist_idx[-1] == len(vertices):
+            dist_idx[-1] = 0
+        # Put 0 in front
+        dist_idx.sort()
+
+        # From vertices close to each other select the one which is farthest from the center
+        # Compute the possible fingertips distances to the center of the hand
+        finger_cdist = [0] * len(dist_idx)
+        for i in range(len(dist_idx)):
+            stop = dist_idx[i+1] if (i+1 < len(dist_idx)) else len(vertices)
+            for j in range(dist_idx[i], stop):
+                vertex_dist_from_center = vertices[j] - self.hand_center
+                vertex_cdist_from_center = np.sqrt(vertex_dist_from_center[0] ** 2 +
+                                                   vertex_dist_from_center[1] ** 2)
+                if finger_cdist[i] < vertex_cdist_from_center:
+                    finger_cdist[i] = vertex_cdist_from_center
+                    dist_idx[i] = j
+
+        # Select actual fingertips
+        self.fingers = np.where(np.logical_and(finger_cdist > self.radius,
+                                vertices[dist_idx, 0] < (self.hand_center[0] + self.radius)))
+
+        self.detect_gesture()
+
+    # =============================================================================
+    def detect_gesture(self):
+        if len(self.fingers[0]) == 5:
+            self.resultVar = "Found 5 extended fingers.Paper"
+        elif len(self.fingers[0]) == 2 or len(self.fingers[0]) == 3:
+            self.resultVar = "Found " + str(len(self.fingers[0])) + " extended fingers. Scissors"
+        elif len(self.fingers[0]) == 0:
+            self.resultVar = "Found " + str(len(self.fingers[0])) + " extended fingers. Rock"
+        else:
+            self.resultVar = "Found " + str(len(self.fingers[0])) + " extended fingers. Unknown gesture"
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,5 +2,7 @@ build/ @@
     sdk/include/aditof/version.h
+    .idea/
     *.temp/
     .vscode*