|
1 | | -import numpy as np |
2 | | -from openvino.inference_engine import IECore |
3 | | -from scipy.optimize import linear_sum_assignment |
| 1 | +""" |
| 2 | + Copyright (C) 2020 Intel Corporation |
4 | 3 |
|
| 4 | + Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + you may not use this file except in compliance with the License. |
| 6 | + You may obtain a copy of the License at |
5 | 7 |
|
6 | | -class AssociativeEmbeddingDecoder: |
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
7 | 9 |
|
8 | | - def __init__(self, num_joints, max_num_people, detection_threshold, use_detection_val, |
9 | | - ignore_too_much, tag_threshold, adjust=True, refine=True, delta=0.0, joints_order=None): |
10 | | - self.num_joints = num_joints |
11 | | - self.max_num_people = max_num_people |
12 | | - self.detection_threshold = detection_threshold |
13 | | - self.tag_threshold = tag_threshold |
14 | | - self.use_detection_val = use_detection_val |
15 | | - self.ignore_too_much = ignore_too_much |
| 10 | + Unless required by applicable law or agreed to in writing, software |
| 11 | + distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + See the License for the specific language governing permissions and |
| 14 | + limitations under the License. |
| 15 | +""" |
16 | 16 |
|
17 | | - if self.num_joints == 17 and joints_order is None: |
18 | | - self.joint_order = (0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16) |
19 | | - else: |
20 | | - self.joint_order = list(np.arange(self.num_joints)) |
21 | | - |
22 | | - self.do_adjust = adjust |
23 | | - self.do_refine = refine |
24 | | - self.delta = delta |
25 | | - |
26 | | - def match(self, tag_k, loc_k, val_k): |
27 | | - return list(map(self._match_by_tag, zip(tag_k, loc_k, val_k))) |
28 | | - |
29 | | - def _max_match(self, scores): |
30 | | - r, c = linear_sum_assignment(scores) |
31 | | - tmp = np.stack((r, c), axis=1) |
32 | | - return tmp |
33 | | - |
34 | | - def _match_by_tag(self, inp): |
35 | | - tag_k, loc_k, val_k = inp |
36 | | - |
37 | | - embd_size = tag_k.shape[2] |
38 | | - |
39 | | - class Pose: |
40 | | - def __init__(self, num_joints, tag_size=1): |
41 | | - self.num_joints = num_joints |
42 | | - self.tag_size = tag_size |
43 | | - self.pose = np.zeros((num_joints, 2 + 1 + tag_size), dtype=np.float32) |
44 | | - self.pose_tag = np.zeros(tag_size, dtype=np.float32) |
45 | | - self.valid_points_num = 0 |
46 | | - |
47 | | - def add(self, idx, joint, tag): |
48 | | - self.pose[idx] = joint |
49 | | - self.pose_tag = (self.pose_tag * self.valid_points_num) + tag |
50 | | - self.valid_points_num += 1 |
51 | | - self.pose_tag /= self.valid_points_num |
52 | | - |
53 | | - @property |
54 | | - def tag(self): |
55 | | - if self.valid_points_num > 0: |
56 | | - return self.pose_tag |
57 | | - else: |
58 | | - return None |
59 | | - |
60 | | - all_joints = np.concatenate((loc_k, val_k[..., None], tag_k), -1) |
61 | | - |
62 | | - poses = [] |
63 | | - for idx in self.joint_order: |
64 | | - tags = tag_k[idx] |
65 | | - joints = all_joints[idx] |
66 | | - mask = joints[:, 2] > self.detection_threshold |
67 | | - tags = tags[mask] |
68 | | - joints = joints[mask] |
69 | | - |
70 | | - if joints.shape[0] == 0: |
71 | | - continue |
72 | | - |
73 | | - if len(poses) == 0: |
74 | | - for tag, joint in zip(tags, joints): |
75 | | - pose = Pose(self.num_joints, embd_size) |
76 | | - pose.add(idx, joint, tag) |
77 | | - poses.append(pose) |
78 | | - else: |
79 | | - if self.ignore_too_much and len(poses) == self.max_num_people: |
80 | | - continue |
81 | | - poses_tags = np.stack([p.tag for p in poses], axis=0) |
82 | | - |
83 | | - diff = tags[:, None] - poses_tags[None, :] |
84 | | - diff_normed = np.linalg.norm(diff, ord=2, axis=2) |
85 | | - diff_saved = np.copy(diff_normed) |
86 | | - |
87 | | - if self.use_detection_val: |
88 | | - diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3] |
89 | | - |
90 | | - num_added = diff.shape[0] |
91 | | - num_grouped = diff.shape[1] |
92 | | - |
93 | | - if num_added > num_grouped: |
94 | | - diff_normed = np.concatenate( |
95 | | - (diff_normed, |
96 | | - np.zeros((num_added, num_added - num_grouped), |
97 | | - dtype=np.float32) + 1e10), |
98 | | - axis=1) |
99 | | - |
100 | | - pairs = self._max_match(diff_normed) |
101 | | - for row, col in pairs: |
102 | | - if row < num_added and col < num_grouped and diff_saved[row][col] < self.tag_threshold: |
103 | | - poses[col].add(idx, joints[row], tags[row]) |
104 | | - else: |
105 | | - pose = Pose(self.num_joints, embd_size) |
106 | | - pose.add(idx, joints[row], tags[row]) |
107 | | - poses.append(pose) |
108 | | - |
109 | | - if len(poses): |
110 | | - ans = np.stack([p.pose for p in poses]).astype(np.float32) |
111 | | - tags = np.stack([p.tag for p in poses]).astype(np.float32) |
112 | | - else: |
113 | | - ans = np.empty((0, self.num_joints, 2 + 1 + embd_size), dtype=np.float32) |
114 | | - tags = np.empty((0, embd_size), dtype=np.float32) |
115 | | - return ans, tags |
116 | | - |
117 | | - def top_k(self, heatmaps, tags): |
118 | | - N, K, H, W = heatmaps.shape |
119 | | - heatmaps = heatmaps.reshape(N, K, -1) |
120 | | - ind = heatmaps.argpartition(-self.max_num_people, axis=2)[:, :, -self.max_num_people:] |
121 | | - val_k = np.take_along_axis(heatmaps, ind, axis=2) |
122 | | - subind = np.argsort(-val_k, axis=2) |
123 | | - ind = np.take_along_axis(ind, subind, axis=2) |
124 | | - val_k = np.take_along_axis(val_k, subind, axis=2) |
125 | | - |
126 | | - tags = tags.reshape(N, K, W * H, -1) |
127 | | - tag_k = [np.take_along_axis(tags[..., i], ind, axis=2) for i in range(tags.shape[3])] |
128 | | - tag_k = np.stack(tag_k, axis=3) |
129 | | - |
130 | | - x = ind % W |
131 | | - y = ind // W |
132 | | - ind_k = np.stack((x, y), axis=3) |
133 | | - |
134 | | - ans = {'tag_k': tag_k, 'loc_k': ind_k, 'val_k': val_k} |
135 | | - return ans |
136 | | - |
137 | | - def adjust(self, ans, heatmaps): |
138 | | - H, W = heatmaps.shape[-2:] |
139 | | - for n, people in enumerate(ans): |
140 | | - for person in people: |
141 | | - for k, joint in enumerate(person): |
142 | | - heatmap = heatmaps[n, k] |
143 | | - px = int(joint[0]) |
144 | | - py = int(joint[1]) |
145 | | - if 1 < px < W - 1 and 1 < py < H - 1: |
146 | | - diff = np.array([ |
147 | | - heatmap[py, px + 1] - heatmap[py, px - 1], |
148 | | - heatmap[py + 1, px] - heatmap[py - 1, px] |
149 | | - ]) |
150 | | - joint[:2] += np.sign(diff) * .25 |
151 | | - return ans |
152 | | - |
153 | | - def refine(self, heatmap, tag, keypoints, pose_tag=None): |
154 | | - K, H, W = heatmap.shape |
155 | | - if len(tag.shape) == 3: |
156 | | - tag = tag[..., None] |
157 | | - |
158 | | - if pose_tag is not None: |
159 | | - prev_tag = pose_tag |
160 | | - else: |
161 | | - tags = [] |
162 | | - for i in range(K): |
163 | | - if keypoints[i, 2] > 0: |
164 | | - x, y = keypoints[i][:2].astype(int) |
165 | | - tags.append(tag[i, y, x]) |
166 | | - prev_tag = np.mean(tags, axis=0) |
167 | | - |
168 | | - # Allocate the buffer for tags similarity matrix. |
169 | | - tag_copy = np.empty_like(tag[0, ..., 0]) |
170 | | - for i, (_heatmap, _tag) in enumerate(zip(heatmap, tag)): |
171 | | - if keypoints[i, 2] > 0: |
172 | | - continue |
173 | | - tag_copy[...] = _tag[..., 0] |
174 | | - diff = tag_copy |
175 | | - diff -= prev_tag |
176 | | - np.abs(diff, out=diff) |
177 | | - np.floor(diff + 0.5, out=diff) |
178 | | - diff -= _heatmap |
179 | | - idx = diff.argmin() |
180 | | - y, x = np.divmod(idx, _heatmap.shape[-1]) |
181 | | - |
182 | | - # detection score at maximum position |
183 | | - val = _heatmap[y, x] |
184 | | - |
185 | | - if val > 0: |
186 | | - keypoints[i, :3] = x, y, val |
187 | | - if 1 < x < W - 1 and 1 < y < H - 1: |
188 | | - diff = np.array([ |
189 | | - _heatmap[y, x + 1] - _heatmap[y, x - 1], |
190 | | - _heatmap[y + 1, x] - _heatmap[y - 1, x] |
191 | | - ]) |
192 | | - keypoints[i, :2] += np.sign(diff) * .25 |
193 | | - |
194 | | - return keypoints |
195 | | - |
196 | | - def __call__(self, heatmaps, tags, nms_heatmaps=None): |
197 | | - ans = self.match(**self.top_k(nms_heatmaps, tags)) |
198 | | - ans, ans_tags = map(list, zip(*ans)) |
199 | | - |
200 | | - if self.do_adjust: |
201 | | - ans = self.adjust(ans, heatmaps) |
202 | | - |
203 | | - if self.delta != 0.0: |
204 | | - for people in ans: |
205 | | - for person in people: |
206 | | - for joint in person: |
207 | | - joint[:2] += self.delta |
208 | | - |
209 | | - ans = ans[0] |
210 | | - scores = np.asarray([i[:, 2].mean() for i in ans]) |
211 | | - |
212 | | - if self.do_refine: |
213 | | - heatmap_numpy = heatmaps[0] |
214 | | - tag_numpy = tags[0] |
215 | | - for i in range(len(ans)): |
216 | | - ans[i] = self.refine(heatmap_numpy, tag_numpy, ans[i], ans_tags[0][i]) |
217 | | - |
218 | | - return ans, scores |
| 17 | +from .decoder_ae import AssociativeEmbeddingDecoder |
| 18 | +from .decoder_openpose import OpenPoseDecoder |
0 commit comments