Skip to content

Commit 630894e

Browse files
committed
Update algorithm
1 parent dbe755d commit 630894e

File tree

5 files changed

+121
-74
lines changed

5 files changed

+121
-74
lines changed

gesture.py

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def check_finger_states(self, hand):
4242
label = hand['label']
4343
facing = hand['facing']
4444

45-
finger_states = [None] * 5
45+
self.finger_states = [None] * 5
4646
joint_angles = np.zeros((5,3)) # 5 fingers and 3 angles each
4747

4848
# wrist to index finger mcp
@@ -55,74 +55,89 @@ def check_finger_states(self, hand):
5555
joint_angles[i] = np.array(
5656
[calculate_thumb_angle(landmarks[joints[j:j+3]], label, facing) for j in range(3)]
5757
)
58-
finger_states[i] = get_finger_state(joint_angles[i], THUMB_THRESH)
58+
self.finger_states[i] = get_finger_state(joint_angles[i], THUMB_THRESH)
5959
else:
6060
joint_angles[i] = np.array(
6161
[calculate_angle(landmarks[joints[j:j+3]]) for j in range(3)]
6262
)
6363
d2 = two_landmark_distance(landmarks[joints[1]], landmarks[joints[4]])
64-
finger_states[i] = get_finger_state(joint_angles[i], NON_THUMB_THRESH)
64+
self.finger_states[i] = get_finger_state(joint_angles[i], NON_THUMB_THRESH)
6565

66-
if finger_states[i] == 0 and d2/d1 < BENT_RATIO_THRESH[i-1]:
67-
finger_states[i] = 1
66+
if self.finger_states[i] == 0 and d2/d1 < BENT_RATIO_THRESH[i-1]:
67+
self.finger_states[i] = 1
6868

69-
return finger_states
69+
return self.finger_states
7070

71-
def detect_gesture(self, img, mode, target_gesture='', draw=True):
71+
def detect_gesture(self, img, mode, draw=True):
7272
hands = self.hand_detector.detect_hands(img)
73-
if draw:
74-
self.hand_detector.draw_landmarks(img)
75-
detected_gesture = None
76-
target_detected = False
73+
self.detected_gesture = None
7774

7875
if hands:
7976
if mode == 'single':
8077
hand = hands[-1]
81-
ges = Gesture(hand['label'])
82-
finger_states = self.check_finger_states(hand)
78+
self.check_finger_states(hand)
79+
80+
wrist_angle = hand['wrist_angle']
81+
pt = hand['landmarks'][0]
82+
cv2.putText(img, f'{round(wrist_angle,2)}', (pt[0]+20,pt[1]+5), 0, 0.8, (0,255,0), 2)
83+
84+
for i in range(5):
85+
pt = hand['landmarks'][4*i+4]
86+
cv2.putText(img, f'{self.finger_states[i]}', (pt[0]-40,pt[1]+5), 0, 0.8, (0,255,0), 2)
87+
8388
if draw:
84-
draw_fingertips(hand['landmarks'], finger_states, img)
85-
86-
detected_gesture = map_gesture(hand['landmarks'],
87-
finger_states,
88-
hand['direction'],
89-
hand['boundary'],
90-
ges.gestures)
89+
self.draw_gesture_landmarks(img)
9190

92-
if detected_gesture:
93-
if target_gesture == '':
94-
draw_bounding_box(hand['landmarks'], detected_gesture, img)
95-
else:
96-
if detected_gesture == target_gesture:
97-
target_detected = True
98-
draw_bounding_box(hand['landmarks'], detected_gesture, img)
91+
ges = Gesture(hand['label'])
92+
self.detected_gesture = map_gesture(ges.gestures,
93+
self.finger_states,
94+
hand['landmarks'],
95+
hand['wrist_angle'],
96+
hand['direction'],
97+
hand['boundary'])
9998

10099
if mode == 'double' and len(hands) == 2:
101100
pass
102101

103-
return target_detected
102+
return self.detected_gesture
103+
104+
def draw_gesture_landmarks(self, img):
105+
hand = self.hand_detector.decoded_hands[-1]
106+
self.hand_detector.draw_landmarks(img)
107+
draw_fingertips(hand['landmarks'], self.finger_states, img)
108+
109+
def draw_gesture_box(self, img):
110+
hand = self.hand_detector.decoded_hands[-1]
111+
draw_bounding_box(hand['landmarks'], self.detected_gesture, img)
104112

105113

106-
def main(mode='single', target_gesture=''):
114+
def main(mode='single', target_gesture='all'):
107115
cap = cv2.VideoCapture(0)
108116
cap.set(3, CAM_W)
109117
cap.set(4, CAM_H)
110-
ges_detector = GestureDetector()
118+
window_name = 'Gesture detection'
119+
120+
max_hands = 1 if mode == 'single' else 2
121+
ges_detector = GestureDetector(max_num_hands=max_hands)
111122
ptime = 0
112123
ctime = 0
113124

114125
while True:
115126
_, img = cap.read()
116127
img = cv2.flip(img, 1)
117-
ges_detector.detect_gesture(img, mode, target_gesture)
128+
ges_detector.detect_gesture(img, mode)
129+
if ges_detector.detected_gesture:
130+
if target_gesture == 'all' or target_gesture == ges_detector.detected_gesture:
131+
ges_detector.draw_gesture_box(img)
118132

119133
ctime = time.time()
120134
fps = 1 / (ctime - ptime)
121135
ptime = ctime
122136

123-
cv2.putText(img, f'FPS: {int(fps)}', (50,38), 0, 0.8, TEXT_COLOR, 2, lineType=cv2.LINE_AA)
124-
125-
cv2.imshow('Gesture detection', img)
137+
cv2.putText(img, f'FPS: {int(fps)}', (50,38), 0, 0.8,
138+
TEXT_COLOR, 2, lineType=cv2.LINE_AA)
139+
140+
cv2.imshow(window_name, img)
126141
key = cv2.waitKey(1)
127142
if key == ord('q'):
128143
cv2.destroyAllWindows()
@@ -133,8 +148,8 @@ def main(mode='single', target_gesture=''):
133148
parser = argparse.ArgumentParser()
134149
parser.add_argument('--mode', type=str, default='single',
135150
help='single/double-hand gestures (default: single)')
136-
parser.add_argument('--target_gesture', type=str, default='',
137-
help='detect a specific gesture (default: empty)')
151+
parser.add_argument('--target_gesture', type=str, default='all',
152+
help='detect a specific gesture (default: all)')
138153
opt = parser.parse_args()
139154

140155
main(**vars(opt))

hand.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import numpy as np
1313

1414
from utils.utils import check_hand_direction, find_boundary_lm
15+
from utils.utils import calculate_angle
1516

1617

1718
CAM_W = 640
@@ -20,12 +21,13 @@
2021

2122

2223
# A hand detector based on mediapipe, it can detect hands and return several features of hands:
23-
# 'index' - the index number of hands, -1 is the firstly detected one
24-
# 'label' - handedness of hands, 'left', 'right'
25-
# 'landmarks' - the coordinates of 21 hand joints
26-
# 'direction' - the direction that a hand is pointing, 'up', 'down', 'left', 'right'
27-
# 'facing' - the facing of hands, 'front', 'back' ('front' means the palm is facing the camera)
28-
# 'boundary' - the boundary joints from 'up', 'down', 'left', 'right'
24+
# 'index' - the index number of hands, -1 is the firstly detected one
25+
# 'label' - handedness of hands, 'left', 'right'
26+
# 'landmarks' - the coordinates of 21 hand joints
27+
# 'wrist_angle' - angle of <index finger mcp, wrist, pinky mcp>
28+
# 'direction' - the direction that a hand is pointing, 'up', 'down', 'left', 'right'
29+
# 'facing' - the facing of hands, 'front', 'back' ('front' means the palm is facing the camera)
30+
# 'boundary' - the boundary joints from 'up', 'down', 'left', 'right'
2931
class HandDetector:
3032
def __init__(self, static_image_mode=False, max_num_hands=2,
3133
min_detection_confidence=0.8, min_tracking_confidence=0.5):
@@ -44,22 +46,22 @@ def __init__(self, static_image_mode=False, max_num_hands=2,
4446
self.min_tracking_confidence)
4547

4648
def detect_hands(self, img):
47-
decoded_hands = None
49+
self.decoded_hands = None
4850
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
4951
self.results = self.hands.process(img_rgb)
5052

5153
if self.results.multi_hand_landmarks:
5254
h, w, _ = img.shape
5355
num_hands = len(self.results.multi_hand_landmarks)
54-
decoded_hands = [None] * num_hands
56+
self.decoded_hands = [None] * num_hands
5557

5658
for i in range(num_hands):
57-
decoded_hands[i] = dict()
59+
self.decoded_hands[i] = dict()
5860
handedness = self.results.multi_handedness[i]
5961
hand_landmarks = self.results.multi_hand_landmarks[i]
6062

61-
decoded_hands[i]['index'] = handedness.classification[0].index
62-
decoded_hands[i]['label'] = handedness.classification[0].label.lower()
63+
self.decoded_hands[i]['index'] = handedness.classification[0].index
64+
self.decoded_hands[i]['label'] = handedness.classification[0].label.lower()
6365

6466
lm_list = list()
6567
wrist_z = hand_landmarks.landmark[0].z
@@ -71,15 +73,19 @@ def detect_hands(self, img):
7173
lm_list.append([cx, cy, cz])
7274

7375
lm_array = np.array(lm_list)
74-
direction, facing = check_hand_direction(lm_array, decoded_hands[i]['label'])
76+
direction, facing = check_hand_direction(lm_array, self.decoded_hands[i]['label'])
7577
boundary = find_boundary_lm(lm_array)
7678

77-
decoded_hands[i]['landmarks'] = lm_array
78-
decoded_hands[i]['direction'] = direction
79-
decoded_hands[i]['facing'] = facing
80-
decoded_hands[i]['boundary'] = boundary
79+
wrist_angle_joints = lm_array[[5, 0, 17]]
80+
wrist_angle = calculate_angle(wrist_angle_joints)
81+
82+
self.decoded_hands[i]['landmarks'] = lm_array
83+
self.decoded_hands[i]['wrist_angle'] = wrist_angle
84+
self.decoded_hands[i]['direction'] = direction
85+
self.decoded_hands[i]['facing'] = facing
86+
self.decoded_hands[i]['boundary'] = boundary
8187

82-
return decoded_hands
88+
return self.decoded_hands
8389

8490
def draw_landmarks(self, img):
8591
w = img.shape[1]
@@ -102,16 +108,17 @@ def main(max_hands=2):
102108
while True:
103109
_, img = cap.read()
104110
img = cv2.flip(img, 1)
105-
decoded_hands = detector.detect_hands(img)
111+
detector.detect_hands(img)
106112
detector.draw_landmarks(img)
107113

108114
ctime = time.time()
109115
fps = 1 / (ctime - ptime)
110116
ptime = ctime
111117

112-
cv2.putText(img, f'FPS: {int(fps)}', (30,40), 0, 0.8, TEXT_COLOR , 2)
113-
if decoded_hands:
114-
cv2.putText(img, f'Number of hands detected: {len(decoded_hands)}',
118+
cv2.putText(img, f'FPS: {int(fps)}', (30,40), 0, 0.8,
119+
TEXT_COLOR , 2, lineType=cv2.LINE_AA)
120+
if detector.decoded_hands:
121+
cv2.putText(img, f'Number of hands detected: {len(detector.decoded_hands)}',
115122
(30,70), 0, 0.8, TEXT_COLOR , 2)
116123

117124
cv2.imshow('Hand detection', img)

utils/templates.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,63 +4,83 @@ def __init__(self, label):
44
self.gestures = {
55
'One': {'finger states': [[2], [0], [3, 4], [3, 4], [3, 4]],
66
'direction': 'up',
7+
'wrist angle': [0.65, 0.85],
78
'overlap': None,
89
'boundary': None},
910
'Two': {'finger states': [[2], [0], [0], [3, 4], [3, 4]],
1011
'direction': 'up',
12+
'wrist angle': [0.75, 0.95],
1113
'overlap': None,
1214
'boundary': None},
1315
'Three': {'finger states': [[2], [0], [0], [0], [3, 4]],
1416
'direction': 'up',
17+
'wrist angle': [0.70, 0.90],
1518
'overlap': None,
1619
'boundary': None},
1720
'Four': {'finger states': [[2], [0], [0], [0], [0]],
1821
'direction': 'up',
22+
'wrist angle': [0.70, 0.90],
1923
'overlap': None,
2024
'boundary': None},
2125
'Five': {'finger states': [[0], [0], [0], [0], [0]],
2226
'direction': 'up',
27+
'wrist angle': [0.70, 0.90],
2328
'overlap': None,
2429
'boundary': None},
2530
'Six': {'finger states': [[0], [3, 4], [3, 4], [3, 4], [0]],
2631
'direction': 'up',
32+
'wrist angle': [0.70, 0.90],
2733
'overlap': None,
2834
'boundary': None},
2935
'Seven': {'finger states': [[1], [1], [1], [3, 4], [3, 4]],
3036
'direction': 'up',
37+
'wrist angle': [0.55, 0.90],
3138
'overlap': None,
3239
'boundary': None},
3340
'Eight': {'finger states': [[0], [0], [3, 4], [3, 4], [3, 4]],
3441
'direction': 'up',
42+
'wrist angle': [0.75, 0.95],
3543
'overlap': None,
3644
'boundary': None},
3745
'Nine': {'finger states': [[1, 2], [2], [3, 4], [3, 4], [3, 4]],
3846
'direction': 'up',
47+
'wrist angle': [0, 0.50],
3948
'overlap': None,
4049
'boundary': None},
4150
'Ten': {'finger states': [[2], [4], [4], [4], [4]],
4251
'direction': 'up',
52+
'wrist angle': [0.75, 0.95],
4353
'overlap': None,
4454
'boundary': None},
4555
'Claw': {'finger states': [[1], [1, 2], [1, 2], [1, 2], [1, 2]],
4656
'direction': 'up',
57+
'wrist angle': [0.75, 0.95],
4758
'overlap': None,
4859
'boundary': None},
4960
'Thumbs-up': {'finger states': [[0], [3, 4], [3, 4], [3, 4], [3, 4]],
5061
'direction': 'left' if label == 'right' else 'right',
62+
'wrist angle': [0.80, 1.50],
5163
'overlap': None,
5264
'boundary': {3: 4}},
5365
'Thumbs-down': {'finger states': [[0], [3, 4], [3, 4], [3, 4], [3, 4]],
5466
'direction': 'left' if label == 'right' else 'right',
67+
'wrist angle': [0.70, 0.90],
5568
'overlap': None,
5669
'boundary': {2: 4}},
5770
'Rock': {'finger states': [[2], [0], [3, 4], [3, 4], [0]],
58-
'direction': 'up',
59-
'overlap': None,
71+
'direction': 'up',
72+
'wrist angle': [0.70, 0.90],
73+
'overlap': None,
6074
'boundary': None},
6175
'OK': {'finger states': [[1], [2], [0], [0], [0]],
6276
'direction': 'up',
77+
'wrist angle': [0.30, 0.60],
6378
'overlap': [[4, 8]],
64-
'boundary': None}
79+
'boundary': None},
80+
'C': {'finger states': [[0], [1], [1], [1], [1]],
81+
'direction': 'up',
82+
'wrist angle': [0, 0.35],
83+
'overlap': None,
84+
'boundary': {0: 0} if label == 'right' else {1: 0}}
6585
}
6686

utils/utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def get_finger_state(joint_angles, threshold):
131131
return finger_state
132132

133133

134-
def map_gesture(landmarks, finger_states, direction, boundary, gestures):
134+
def map_gesture(gestures, finger_states, landmarks, wrist_angle, direction, boundary):
135135
""" Map detected gesture fetures to a pre-defined gesture template. """
136136
detected_gesture = None
137137
d = two_landmark_distance(landmarks[0], landmarks[5])
@@ -147,6 +147,9 @@ def map_gesture(landmarks, finger_states, direction, boundary, gestures):
147147
break
148148
if flag == 0:
149149
count += 1
150+
# check wrist angle
151+
if temp['wrist angle'][0] < wrist_angle < temp['wrist angle'][1]:
152+
count += 1
150153
# check direction
151154
if temp['direction'] == direction:
152155
count += 1
@@ -173,7 +176,7 @@ def map_gesture(landmarks, finger_states, direction, boundary, gestures):
173176
if flag == 0:
174177
count += 1
175178

176-
if count == 4:
179+
if count == 5:
177180
detected_gesture = ges
178181
break
179182

0 commit comments

Comments
 (0)