-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_landmarks.py
147 lines (131 loc) · 5.18 KB
/
get_landmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
from tkinter import RIGHT
from turtle import left, right
import cv2
import math
import pickle
import numpy as np
import mediapipe as mp
from tqdm import tqdm
import open3d as o3d
import json
# this script maps skeleton tracking to our collected image data, and save the
# world landmark coordinates in m to json
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
path = '/Users/ivyhe/Desktop/pointing/data' # change file path to image path
# fnum = 9 # can change this to desired file number (just remember to change it in the other files too)
# human measurement defined base on mediapipe pose landmark
NOSE = 0
LEFT_EYE = 2
RIGHT_EYE = 5
LEFT_SHOULDER = 11
RIGHT_SHOULDER = 12
LEFT_ELBOW = 13
RIGHT_ELBOW = 14
LEFT_WRIST = 15
RIGHT_WRIST = 16
LEFT_HEEL = 29
RIGHT_HEEL = 30
LEFT_FOOT_INDEX = 31
RIGHT_FOOT_INDEX = 32
# Intel RealSense D435 CAMERA INTRINSICS (in pixel units)
focal_x = 190.926941
focal_y = 190.926941
res_x = 320
res_y = 240
principal_x = 159.634918
principal_y = 119.397690
thresh = math.pi/18 #vector angle threshold for determining pointing observation
## helper Functions
# Check if p exists, if so, convert to x, y coordinate
def check(p, w, h):
if p: # syntax that checks if list is empty...
return [round(p.x * w), round(p.y * h)]
else:
return None
# Calculates a normalized vector from point a to point b.
def calculate_vector(a, b):
if a and b:
distance = [b[0]-a[0], b[1]-a[1]]
norm = math.sqrt(distance[0] ** 2 + distance[1] ** 2)
return [distance[0] / norm, distance[1] / norm]
else:
return None
data = {
"image": [],
"world": []
}
# map skeleton to image
# For static images:
BG_COLOR = (192, 192, 192) # gray
BOUNDARY_SEGMENTATION = False
PLOT_WORLD_LANDMARK = False
with mp_pose.Pose(
static_image_mode=True,
enable_segmentation=True,
min_detection_confidence=0.5) as pose:
for file in os.listdir(path):
if '.png'in file:
print(file)
image = cv2.imread(path+"/"+file)
h, w, _ = image.shape
# Convert the BGR image to RGB before processing.
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
l = results.pose_world_landmarks.landmark
nose = [l[NOSE].x, l[NOSE].y, l[NOSE].z]
left_e = [l[LEFT_EYE].x, l[LEFT_EYE].y, l[LEFT_EYE].z]
right_e = [l[RIGHT_EYE].x, l[RIGHT_EYE].y, l[RIGHT_EYE].z]
mid_e = [(left_e[0]+right_e[0])/2, (left_e[1]+right_e[1])/2, (left_e[2]+right_e[2])/2]
left_w = [l[LEFT_WRIST].x, l[LEFT_WRIST].y, l[LEFT_WRIST].z]
right_w = [l[RIGHT_WRIST].x, l[RIGHT_WRIST].y, l[RIGHT_WRIST].z]
left_s = [l[LEFT_SHOULDER].x, l[LEFT_SHOULDER].y, l[LEFT_SHOULDER].z]
right_s = [l[RIGHT_SHOULDER].x, l[RIGHT_SHOULDER].y, l[RIGHT_SHOULDER].z]
left_elb = [l[LEFT_ELBOW].x, l[LEFT_ELBOW].y, l[LEFT_ELBOW].z]
right_elb = [l[RIGHT_ELBOW].x, l[RIGHT_ELBOW].y, l[RIGHT_ELBOW].z]
left_h = [l[LEFT_HEEL].x, l[LEFT_HEEL].y, l[LEFT_HEEL].z]
right_h = [l[RIGHT_HEEL].x, l[RIGHT_HEEL].y, l[RIGHT_HEEL].z]
left_f_idx = [l[LEFT_FOOT_INDEX].x, l[LEFT_FOOT_INDEX].y, l[LEFT_FOOT_INDEX].z]
right_f_idx = [l[RIGHT_FOOT_INDEX].x, l[RIGHT_FOOT_INDEX].y, l[RIGHT_FOOT_INDEX].z]
ground_offset = min(left_h[1],right_h[1], left_f_idx[1], right_f_idx[1] )
# Output image data to json
image_data = {}
image_data["name"] = file
image_data["world_landmark_nose"] = nose
image_data["world_landmark_left_eye"] = left_e
image_data["world_landmark_right_eye"] = right_e
image_data["world_landmark_mid_eye"] = mid_e
image_data["world_landmark_left_wrist"] = left_w
image_data["world_landmark_right_wrist"] = right_w
image_data["world_landmark_left_shoulder"] = left_s
image_data["world_landmark_right_shoulder"] = right_s
image_data["world_landmark_left_elbow"] = left_elb
image_data["world_landmark_right_elbow"] = right_elb
image_data["ground_offset"] = ground_offset
image_data["target"] = int(file[-5])
data["image"].append(image_data)
else:
continue
annotated_image = image.copy()
# Draw segmentation on the image.
# To improve segmentation around boundaries, consider applying a joint
# bilateral filter to "results.segmentation_mask" with "image".
if BOUNDARY_SEGMENTATION:
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
bg_image = np.zeros(image.shape, dtype=np.uint8)
bg_image[:] = BG_COLOR
annotated_image = np.where(condition, annotated_image, bg_image)
# Draw pose landmarks on the image.
mp_drawing.draw_landmarks(
annotated_image,
results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imshow('annotated', annotated_image)
cv2.waitKey(0)
if BOUNDARY_SEGMENTATION:
cv2.imwrite(path + '/seg/annotated_' + file + '.png', annotated_image)
# Write the data to a JSON file (create the file if it doesn't exist)
with open(path+'.json', 'w') as json_file:
json.dump(data, json_file, indent=4)