forked from tanjeffreyz/auto-maple
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetection.py
146 lines (114 loc) · 5.02 KB
/
detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import tensorflow as tf
import numpy as np
import cv2
#########################
# Functions #
#########################
def load_model():
model_dir = f'assets/models/rune_model_rnn_filtered_cannied/saved_model'
model = tf.saved_model.load(model_dir)
return model
def canny(image):
image = cv2.Canny(image, 200, 300)
colored = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
return colored
def filter_color(image):
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (1, 100, 100), (75, 255, 255))
# Mask the image
imask = mask > 0
arrows = np.zeros_like(image, np.uint8)
arrows[imask] = image[imask]
return arrows
def run_inference_for_single_image(model, image):
image = np.asarray(image)
input_tensor = tf.convert_to_tensor(image)
input_tensor = input_tensor[tf.newaxis,...]
model_fn = model.signatures['serving_default']
output_dict = model_fn(input_tensor)
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key: value[0,:num_detections].numpy()
for key, value in output_dict.items()}
output_dict['num_detections'] = num_detections
output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
return output_dict
def sort_by_confidence(model, image):
output_dict = run_inference_for_single_image(model, image)
zipped = list(zip(output_dict['detection_scores'], output_dict['detection_boxes'], output_dict['detection_classes']))
pruned = [tuple for tuple in zipped if tuple[0] > 0.5]
pruned.sort(key=lambda x: x[0], reverse=True)
result = pruned[:4]
return result
def get_boxes(image):
output_dict = run_inference_for_single_image(detection_model, image)
zipped = list(zip(output_dict['detection_scores'], output_dict['detection_boxes'], output_dict['detection_classes']))
pruned = [tuple for tuple in zipped if tuple[0] > 0.5]
pruned.sort(key=lambda x: x[0], reverse=True)
pruned = pruned[:4]
boxes = [tuple[1:] for tuple in pruned]
return boxes
def merge_detection(image):
label_map = {1: 'up', 2: 'down', 3: 'left', 4: 'right'}
converter = {'up': 'right', 'down': 'left'}
classes = []
# Preprocessing
height, width, channels = image.shape
cropped = image[120:height//2, width//4:3*width//4]
filtered = filter_color(cropped)
cannied = canny(filtered)
# Isolate the rune box
height, width, channels = cannied.shape
boxes = get_boxes(cannied)
if len(boxes) == 4: # Only run further inferences if arrows have been correctly detected
ymins = [b[0][0] for b in boxes]
xmins = [b[0][1] for b in boxes]
ymaxs = [b[0][2] for b in boxes]
xmaxs = [b[0][3] for b in boxes]
left = int(round(min(xmins)* width))
right = int(round(max(xmaxs) * width))
top = int(round(min(ymins) * height))
bottom = int(round(max(ymaxs) * height))
rune_box = cannied[top:bottom, left:right]
# Pad the rune box with black borders, effectively eliminating the noise around it
height, width, channels = rune_box.shape
pad_height, pad_width = 384, 455
preprocessed = np.full((pad_height, pad_width, channels), (0, 0, 0), dtype=np.uint8)
x_offset = (pad_width - width) // 2
y_offset = (pad_height - height) // 2
if x_offset > 0 and y_offset > 0:
preprocessed[y_offset:y_offset+height, x_offset:x_offset+width] = rune_box
# Run detection on preprocessed image
lst = sort_by_confidence(detection_model, preprocessed)
lst.sort(key=lambda x: x[1][1])
classes = [label_map[item[2]] for item in lst]
# Run detection rotated image
rotated = cv2.rotate(preprocessed, cv2.ROTATE_90_COUNTERCLOCKWISE)
lst = sort_by_confidence(detection_model, rotated)
lst.sort(key=lambda x: x[1][2], reverse=True)
rotated_classes = [converter[label_map[item[2]]]
for item in lst
if item[2] in [1, 2]]
# Merge the two detection results
for i in range(len(classes)):
if rotated_classes and classes[i] in ['left', 'right']:
classes[i] = rotated_classes.pop(0)
return classes
#############################
# Initialization #
#############################
detection_model = load_model()
# Run the inference once to 'warm up' tensorflow (the first detection triggers a long setup process)
test_image = cv2.imread('assets/inference_test_image.jpg')
merge_detection(test_image)
print('Loaded detection model')
if __name__ == '__main__':
import mss
monitor = {'top': 0, 'left': 0, 'width': 1366, 'height': 768}
while True:
with mss.mss() as sct:
frame = np.array(sct.grab(monitor))
cv2.imshow('frame', canny(filter_color(frame)))
arrows = merge_detection(frame)
print(arrows)
if cv2.waitKey(1) & 0xFF == 27: # 27 is ASCII for the Esc key
break