-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathinference_classifier.py
121 lines (90 loc) · 3.07 KB
/
inference_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pickle
import cv2
import mediapipe as mp
import numpy as np
import pyttsx3
import serial
import time
ser = serial.Serial("COM7", 9600) #Change your port name COM... and your baudrate
#ser.open()
#qser.reset_input_buffer()
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']
cap = cv2.VideoCapture(1)
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
# initialize Text-to-speech engine
engine = pyttsx3.init()
word = ""
#labels_dict = {0: 'A', 1: 'B', 2: 'L'}
while True:
data_aux = []
x_ = []
y_ = []
ret, frame = cap.read()
H, W, _ = frame.shape
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
frame, # image to draw
hand_landmarks, # model output
mp_hands.HAND_CONNECTIONS, # hand connections
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())
for hand_landmarks in results.multi_hand_landmarks:
for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
x_.append(x)
y_.append(y)
for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
data_aux.append(x - min(x_))
data_aux.append(y - min(y_))
x1 = int(min(x_) * W) - 10
y1 = int(min(y_) * H) - 10
x2 = int(max(x_) * W) - 10
y2 = int(max(y_) * H) - 10
# Ensure data_aux has exactly 84 features
while len(data_aux) < 84:
data_aux.append(0)
prediction = model.predict([np.asarray(data_aux)])
#changed this
predicted_character = str(prediction[0])
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
cv2.LINE_AA)
#letters
if cv2.waitKey(1) == ord('w'):
word += predicted_character
print(word)
#to add spaces
if cv2.waitKey(1) == ord('e'):
word += " "
print (word)
#for words
if cv2.waitKey(1) == ord('q'):
word += predicted_character
print(word)
#ser.write(word.encode())
break
cv2.imshow('frame', frame)
cv2.waitKey(1)
print(word)
cap.release()
cv2.destroyAllWindows()
engine.say(word)
#play the speech
engine.runAndWait()
ser.write(word.encode())
time.sleep(0.05)
ser.close()
#tts = gtts.gTTS(word)
#tts.save("hello.mp3")
#playsound("hello.mp3")
engine.say("Bark")