1
+ import tensorflow as tf
2
+ import numpy as np
3
+ import cv2
4
+ import time
5
+ import argparse
6
+ import math
7
+ import random
8
+ from ffpyplayer .player import MediaPlayer
9
+
10
+ import posenet
11
+
12
+ parser = argparse .ArgumentParser ()
13
+ parser .add_argument ('--model' , type = int , default = 101 )
14
+ parser .add_argument ('--cam_id' , type = int , default = 0 )
15
+ parser .add_argument ('--cam_width' , type = int , default = 1280 )
16
+ parser .add_argument ('--cam_height' , type = int , default = 720 )
17
+ parser .add_argument ('--scale_factor' , type = float , default = 0.2 )
18
+ parser .add_argument ('--file' , type = str , default = None , help = "Optionally use a video file instead of a live camera" )
19
+ args = parser .parse_args ()
20
+
21
+ beat_times = [ 2.39165533 , 3.34367347 , 3.83129252 , 4.29569161 , 5.24770975 ,
22
+ 5.7353288 , 6.22294785 , 7.17496599 , 7.66258503 , 8.12698413 ,
23
+ 9.10222222 , 9.58984127 , 10.05424036 , 11.0062585 , 11.49387755 ,
24
+ 11.9814966 , 12.93351474 , 13.42113379 , 13.90875283 , 14.86077098 ,
25
+ 15.34839002 , 15.81278912 , 16.78802721 , 17.2524263 , 17.74004535 ,
26
+ 18.69206349 , 19.17968254 , 19.66730159 , 20.61931973 , 21.10693878 ,
27
+ 21.57133787 , 22.54657596 , 23.01097506 , 23.4985941 , 24.45061224 ,
28
+ 24.93823129 ]
29
+
30
+ def main ():
31
+ with tf .Session () as sess :
32
+ model_cfg , model_outputs = posenet .load_model (args .model , sess )
33
+ output_stride = model_cfg ['output_stride' ]
34
+
35
+ if args .file is not None :
36
+ cap = cv2 .VideoCapture (args .file )
37
+ else :
38
+ cap = cv2 .VideoCapture (args .cam_id )
39
+ cap .set (3 , args .cam_width )
40
+ cap .set (4 , args .cam_height )
41
+
42
+
43
+ frame_count = 0
44
+ intersect = False
45
+ c = (150 ,150 )
46
+ j = 0
47
+ hand = 'na'
48
+ while True :
49
+ input_image , display_image , output_scale = posenet .read_cap (
50
+ cap , scale_factor = args .scale_factor , output_stride = output_stride )
51
+
52
+ heatmaps_result , offsets_result , displacement_fwd_result , displacement_bwd_result = sess .run (
53
+ model_outputs ,
54
+ feed_dict = {'image:0' : input_image }
55
+ )
56
+
57
+ pose_scores , keypoint_scores , keypoint_coords = posenet .decode_multi .decode_multiple_poses (
58
+ heatmaps_result .squeeze (axis = 0 ),
59
+ offsets_result .squeeze (axis = 0 ),
60
+ displacement_fwd_result .squeeze (axis = 0 ),
61
+ displacement_bwd_result .squeeze (axis = 0 ),
62
+ output_stride = output_stride ,
63
+ max_pose_detections = 1 ,
64
+ min_pose_score = 0.25 )
65
+
66
+ keypoint_coords *= output_scale
67
+
68
+ if pose_scores [0 ] == 0 :
69
+ continue
70
+ pi = np .argmax (pose_scores )
71
+
72
+ # TODO this isn't particularly fast, use GL for drawing and display someday...
73
+ overlay_image = posenet .draw_skel_and_kp (
74
+ display_image , [pose_scores [pi ]], keypoint_scores , keypoint_coords ,
75
+ min_pose_score = 0.25 , min_part_score = 0.25 )
76
+
77
+ h ,w = overlay_image .shape [:2 ]
78
+ circles = [(w // 4 ,3 * h // 4 ),
79
+ (3 * w // 4 ,3 * h // 4 ),
80
+ (w // 4 ,h // 4 ),
81
+ (3 * w // 4 ,h // 4 ),
82
+ (w // 4 ,h // 2 ),
83
+ (3 * w // 4 ,h // 2 ),
84
+ ((w // 4 )- 50 ,h // 4 ),
85
+ ((3 * w // 4 )+ 50 ,h // 4 ),
86
+ ((3 * w // 4 )+ 50 ,3 * h // 4 ),
87
+ ((w // 4 )- 50 ,3 * h // 4 )]
88
+
89
+ k = keypoint_coords [pi ][- 10 :]
90
+ k = k .astype (int )
91
+ xl = (3 * k [2 ,1 ] - k [0 ,1 ])// 2
92
+ yl = (3 * k [2 ,0 ] - k [0 ,0 ])// 2
93
+ overlay_image = cv2 .circle (overlay_image , (xl ,yl ), 70 , (150 ,150 ,0 ), thickness = 10 )
94
+
95
+ xr = (3 * k [3 ,1 ] - k [1 ,1 ])// 2
96
+ yr = (3 * k [3 ,0 ] - k [1 ,0 ])// 2
97
+ overlay_image = cv2 .circle (overlay_image , (xr ,yr ), 70 , (0 ,150 ,255 ), thickness = 10 )
98
+
99
+ if not intersect :
100
+ dl = math .sqrt ((c [0 ]- xl )** 2 + (c [1 ]- yl )** 2 )
101
+ dr = math .sqrt ((c [0 ]- xr )** 2 + (c [1 ]- yr )** 2 )
102
+ if dl < 160 or dr < 160 :
103
+ intersect = True
104
+ hand = 'left' if dl < 160 else 'right'
105
+ player = MediaPlayer ('tt.mov' )
106
+ start = time .time () + 0.5
107
+ else :
108
+ #print((time.time()-start))
109
+ if j < len (beat_times ) and 0 <= (round ((time .time ()- start ),1 ) - round ((beat_times [j ]),1 )) <= 0.1 :
110
+ #print("yay", round((time.time()-start),1) - round((beat_times[j]),1))
111
+ c = random .choice (circles )
112
+ j += 1
113
+ #intersect = False
114
+ overlay_image = cv2 .circle (overlay_image , (c [0 ],c [1 ]), 90 , (255 ,255 ,255 ), thickness = 10 )
115
+ overlay_image = cv2 .flip (overlay_image ,1 )
116
+ #cv2.putText(overlay_image, 'rection time : '+ str(t), (20,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 2, cv2.LINE_AA)
117
+ #cv2.putText(overlay_image, 'hand : '+ hand, (20,55), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 2, cv2.LINE_AA)
118
+
119
+ # for c in circles:
120
+ # dl = math.sqrt((c[0]-xl)**2 + (c[1]-yl)**2)
121
+ # dr = math.sqrt((c[0]-xr)**2 + (c[1]-yr)**2)
122
+
123
+ # if dl<160:
124
+ # overlay_image = cv2.circle(overlay_image, (c[0],c[1]), 90, (150,150,0), thickness =10)
125
+ # elif dr<160:
126
+ # overlay_image = cv2.circle(overlay_image, (c[0],c[1]), 90, (0,150,255), thickness =10)
127
+ # else:
128
+ # overlay_image = cv2.circle(overlay_image, (c[0],c[1]), 90, (0,0,0), thickness =10)
129
+
130
+ cv2 .imshow ('posenet' , overlay_image )
131
+ frame_count += 1
132
+ if cv2 .waitKey (1 ) & 0xFF == ord ('q' ):
133
+ break
134
+
135
+ #print('Average FPS: ', frame_count / (time.time() - start))
136
+
137
+
138
+ if __name__ == "__main__" :
139
+ main ()
0 commit comments