Skip to content

Commit 8aff9d6

Browse files
committed
Merge branch 'dp/hpe_update' of https://github.com/druzhkov-paul/open_model_zoo into dp/hpe_update
2 parents 5400bff + 2116b47 commit 8aff9d6

File tree

20 files changed

+1447
-1033
lines changed

20 files changed

+1447
-1033
lines changed

demos/python_demos/human_pose_estimation_demo/README.md

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,42 +32,42 @@ python3 human_pose_estimation.py -h
3232
```
3333
The command yields the following usage message:
3434
```
35-
usage: human_pose_estimation.py [-h] -m MODEL -i INPUT [-d DEVICE]
36-
[-t PROB_THRESHOLD] [-r]
37-
[-nireq NUM_INFER_REQUESTS]
35+
usage: human_pose_estimation.py [-h] -i INPUT -m MODEL --type {ae,openpose}
36+
[--tsize TSIZE] [-t PROB_THRESHOLD] [-r]
37+
[-d DEVICE] [-nireq NUM_INFER_REQUESTS]
3838
[-nstreams NUM_STREAMS]
3939
[-nthreads NUM_THREADS] [-loop LOOP]
4040
[-no_show] [-u UTILIZATION_MONITORS]
4141
4242
Options:
4343
-h, --help Show this help message and exit.
44-
-m MODEL, --model MODEL
45-
Required. Path to an .xml file with a trained model.
4644
-i INPUT, --input INPUT
4745
Required. Path to an image, video file or a numeric
4846
camera ID.
49-
-d DEVICE, --device DEVICE
50-
Optional. Specify the target device to infer on; CPU,
51-
GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
52-
will look for a suitable plugin for device specified.
53-
Default value is CPU.
47+
-m MODEL, --model MODEL
48+
Required. Path to an .xml file with a trained model.
49+
--type {ae,openpose} Required. Type of the network, either "ae" for
50+
Associative Embedding or "openpose" for OpenPose.
51+
--tsize TSIZE Optional. Target input size. This demo implements
52+
image pre-processing pipeline that is common to human
53+
pose estimation approaches. Image is resize first to
54+
some target size and then the network is reshaped to
55+
fit the input image shape. By default target image
56+
size is determined based on the input shape from IR.
57+
Alternatively it can be manually set via this
58+
parameter. Note that for OpenPose-like nets image is
59+
resized to a predefined height, which is the target
60+
size in this case. For Associative Embedding-like nets
61+
target size is the length of a short image side.
5462
-t PROB_THRESHOLD, --prob_threshold PROB_THRESHOLD
5563
Optional. Probability threshold for poses filtering.
5664
-r, --raw_output_message
5765
Optional. Output inference results raw values showing.
58-
Required. Path to an .xml file with a trained model.
59-
-i INPUT, --input INPUT
60-
Required. Path to an image, video file or a numeric
61-
camera ID.
6266
-d DEVICE, --device DEVICE
6367
Optional. Specify the target device to infer on; CPU,
6468
GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
6569
will look for a suitable plugin for device specified.
6670
Default value is CPU.
67-
-t PROB_THRESHOLD, --prob_threshold PROB_THRESHOLD
68-
Optional. Probability threshold for poses filtering.
69-
-r, --raw_output_message
70-
Optional. Output inference results raw values showing.
7171
-nireq NUM_INFER_REQUESTS, --num_infer_requests NUM_INFER_REQUESTS
7272
Optional. Number of infer requests
7373
-nstreams NUM_STREAMS, --num_streams NUM_STREAMS
@@ -87,7 +87,7 @@ Options:
8787
```
8888

8989
Running the application with the empty list of options yields the short usage message and an error message.
90-
You can use the following command to do inference on GPU with a pre-trained human pose estimation model:
90+
You can use the following command to do inference on CPU with a pre-trained human pose estimation model:
9191
```
9292
python3 human_pose_estimation.py -i <path_to_video>/inputVideo.mp4 -m <path_to_model>/hpe.xml -d CPU
9393
```

demos/python_demos/human_pose_estimation_demo/human_pose_estimation.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,29 @@ def build_argparser():
4242
parser = ArgumentParser(add_help=False)
4343
args = parser.add_argument_group('Options')
4444
args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
45-
args.add_argument('-m', '--model', help='Required. Path to an .xml file with a trained model.',
46-
required=True, type=str)
4745
args.add_argument('-i', '--input', help='Required. Path to an image, video file or a numeric camera ID.',
4846
required=True, type=str)
47+
args.add_argument('-m', '--model', help='Required. Path to an .xml file with a trained model.',
48+
required=True, type=str)
49+
args.add_argument('--type', choices=('ae', 'openpose'), required=True, type=str,
50+
help='Required. Type of the network, either "ae" for Associative Embedding '
51+
'or "openpose" for OpenPose.')
52+
args.add_argument('--tsize', default=None, type=int,
53+
help='Optional. Target input size. This demo implements image pre-processing pipeline '
54+
'that is common to human pose estimation approaches. Image is resize first to some '
55+
'target size and then the network is reshaped to fit the input image shape. '
56+
'By default target image size is determined based on the input shape from IR. '
57+
'Alternatively it can be manually set via this parameter. Note that for OpenPose-like '
58+
'nets image is resized to a predefined height, which is the target size in this case. '
59+
'For Associative Embedding-like nets target size is the length of a short image side.')
60+
args.add_argument('-t', '--prob_threshold', help='Optional. Probability threshold for poses filtering.',
61+
default=0.1, type=float)
62+
args.add_argument('-r', '--raw_output_message', help='Optional. Output inference results raw values showing.',
63+
default=False, action='store_true')
4964
args.add_argument('-d', '--device',
5065
help='Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is '
5166
'acceptable. The sample will look for a suitable plugin for device specified. '
5267
'Default value is CPU.', default='CPU', type=str)
53-
args.add_argument('-t', '--prob_threshold', help='Optional. Probability threshold for poses filtering.',
54-
default=0.5, type=float)
55-
args.add_argument('-r', '--raw_output_message', help='Optional. Output inference results raw values showing.',
56-
default=False, action='store_true')
5768
args.add_argument('-nireq', '--num_infer_requests', help='Optional. Number of infer requests',
5869
default=1, type=int)
5970
args.add_argument('-nstreams', '--num_streams',
@@ -68,8 +79,6 @@ def build_argparser():
6879
args.add_argument('-no_show', '--no_show', help="Optional. Don't show output", action='store_true')
6980
args.add_argument('-u', '--utilization_monitors', default='', type=str,
7081
help='Optional. List of monitors to show initially.')
71-
72-
args.add_argument('--type', default='ae', choices=('ae', 'openpose'), type=str, help='Optional.')
7382
return parser
7483

7584

@@ -140,12 +149,12 @@ def main():
140149

141150
hpes = {
142151
Modes.USER_SPECIFIED:
143-
HPE(ie, args.model, device=args.device, plugin_config=config_user_specified,
152+
HPE(ie, args.model, target_size=args.tsize, device=args.device, plugin_config=config_user_specified,
144153
results=completed_request_results, max_num_requests=args.num_infer_requests,
145154
caught_exceptions=exceptions),
146155
Modes.MIN_LATENCY:
147-
HPE(ie, args.model, device=args.device.split(':')[-1].split(',')[0], plugin_config=config_min_latency,
148-
results=completed_request_results, max_num_requests=1,
156+
HPE(ie, args.model, target_size=args.tsize, device=args.device.split(':')[-1].split(',')[0],
157+
plugin_config=config_min_latency, results=completed_request_results, max_num_requests=1,
149158
caught_exceptions=exceptions)
150159
}
151160

Lines changed: 14 additions & 214 deletions
Original file line numberDiff line numberDiff line change
@@ -1,218 +1,18 @@
1-
import numpy as np
2-
from openvino.inference_engine import IECore
3-
from scipy.optimize import linear_sum_assignment
1+
"""
2+
Copyright (C) 2020 Intel Corporation
43
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
57
6-
class AssociativeEmbeddingDecoder:
8+
http://www.apache.org/licenses/LICENSE-2.0
79
8-
def __init__(self, num_joints, max_num_people, detection_threshold, use_detection_val,
9-
ignore_too_much, tag_threshold, adjust=True, refine=True, delta=0.0, joints_order=None):
10-
self.num_joints = num_joints
11-
self.max_num_people = max_num_people
12-
self.detection_threshold = detection_threshold
13-
self.tag_threshold = tag_threshold
14-
self.use_detection_val = use_detection_val
15-
self.ignore_too_much = ignore_too_much
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
1616

17-
if self.num_joints == 17 and joints_order is None:
18-
self.joint_order = (0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16)
19-
else:
20-
self.joint_order = list(np.arange(self.num_joints))
21-
22-
self.do_adjust = adjust
23-
self.do_refine = refine
24-
self.delta = delta
25-
26-
def match(self, tag_k, loc_k, val_k):
27-
return list(map(self._match_by_tag, zip(tag_k, loc_k, val_k)))
28-
29-
def _max_match(self, scores):
30-
r, c = linear_sum_assignment(scores)
31-
tmp = np.stack((r, c), axis=1)
32-
return tmp
33-
34-
def _match_by_tag(self, inp):
35-
tag_k, loc_k, val_k = inp
36-
37-
embd_size = tag_k.shape[2]
38-
39-
class Pose:
40-
def __init__(self, num_joints, tag_size=1):
41-
self.num_joints = num_joints
42-
self.tag_size = tag_size
43-
self.pose = np.zeros((num_joints, 2 + 1 + tag_size), dtype=np.float32)
44-
self.pose_tag = np.zeros(tag_size, dtype=np.float32)
45-
self.valid_points_num = 0
46-
47-
def add(self, idx, joint, tag):
48-
self.pose[idx] = joint
49-
self.pose_tag = (self.pose_tag * self.valid_points_num) + tag
50-
self.valid_points_num += 1
51-
self.pose_tag /= self.valid_points_num
52-
53-
@property
54-
def tag(self):
55-
if self.valid_points_num > 0:
56-
return self.pose_tag
57-
else:
58-
return None
59-
60-
all_joints = np.concatenate((loc_k, val_k[..., None], tag_k), -1)
61-
62-
poses = []
63-
for idx in self.joint_order:
64-
tags = tag_k[idx]
65-
joints = all_joints[idx]
66-
mask = joints[:, 2] > self.detection_threshold
67-
tags = tags[mask]
68-
joints = joints[mask]
69-
70-
if joints.shape[0] == 0:
71-
continue
72-
73-
if len(poses) == 0:
74-
for tag, joint in zip(tags, joints):
75-
pose = Pose(self.num_joints, embd_size)
76-
pose.add(idx, joint, tag)
77-
poses.append(pose)
78-
else:
79-
if self.ignore_too_much and len(poses) == self.max_num_people:
80-
continue
81-
poses_tags = np.stack([p.tag for p in poses], axis=0)
82-
83-
diff = tags[:, None] - poses_tags[None, :]
84-
diff_normed = np.linalg.norm(diff, ord=2, axis=2)
85-
diff_saved = np.copy(diff_normed)
86-
87-
if self.use_detection_val:
88-
diff_normed = np.round(diff_normed) * 100 - joints[:, 2:3]
89-
90-
num_added = diff.shape[0]
91-
num_grouped = diff.shape[1]
92-
93-
if num_added > num_grouped:
94-
diff_normed = np.concatenate(
95-
(diff_normed,
96-
np.zeros((num_added, num_added - num_grouped),
97-
dtype=np.float32) + 1e10),
98-
axis=1)
99-
100-
pairs = self._max_match(diff_normed)
101-
for row, col in pairs:
102-
if row < num_added and col < num_grouped and diff_saved[row][col] < self.tag_threshold:
103-
poses[col].add(idx, joints[row], tags[row])
104-
else:
105-
pose = Pose(self.num_joints, embd_size)
106-
pose.add(idx, joints[row], tags[row])
107-
poses.append(pose)
108-
109-
if len(poses):
110-
ans = np.stack([p.pose for p in poses]).astype(np.float32)
111-
tags = np.stack([p.tag for p in poses]).astype(np.float32)
112-
else:
113-
ans = np.empty((0, self.num_joints, 2 + 1 + embd_size), dtype=np.float32)
114-
tags = np.empty((0, embd_size), dtype=np.float32)
115-
return ans, tags
116-
117-
def top_k(self, heatmaps, tags):
118-
N, K, H, W = heatmaps.shape
119-
heatmaps = heatmaps.reshape(N, K, -1)
120-
ind = heatmaps.argpartition(-self.max_num_people, axis=2)[:, :, -self.max_num_people:]
121-
val_k = np.take_along_axis(heatmaps, ind, axis=2)
122-
subind = np.argsort(-val_k, axis=2)
123-
ind = np.take_along_axis(ind, subind, axis=2)
124-
val_k = np.take_along_axis(val_k, subind, axis=2)
125-
126-
tags = tags.reshape(N, K, W * H, -1)
127-
tag_k = [np.take_along_axis(tags[..., i], ind, axis=2) for i in range(tags.shape[3])]
128-
tag_k = np.stack(tag_k, axis=3)
129-
130-
x = ind % W
131-
y = ind // W
132-
ind_k = np.stack((x, y), axis=3)
133-
134-
ans = {'tag_k': tag_k, 'loc_k': ind_k, 'val_k': val_k}
135-
return ans
136-
137-
def adjust(self, ans, heatmaps):
138-
H, W = heatmaps.shape[-2:]
139-
for n, people in enumerate(ans):
140-
for person in people:
141-
for k, joint in enumerate(person):
142-
heatmap = heatmaps[n, k]
143-
px = int(joint[0])
144-
py = int(joint[1])
145-
if 1 < px < W - 1 and 1 < py < H - 1:
146-
diff = np.array([
147-
heatmap[py, px + 1] - heatmap[py, px - 1],
148-
heatmap[py + 1, px] - heatmap[py - 1, px]
149-
])
150-
joint[:2] += np.sign(diff) * .25
151-
return ans
152-
153-
def refine(self, heatmap, tag, keypoints, pose_tag=None):
154-
K, H, W = heatmap.shape
155-
if len(tag.shape) == 3:
156-
tag = tag[..., None]
157-
158-
if pose_tag is not None:
159-
prev_tag = pose_tag
160-
else:
161-
tags = []
162-
for i in range(K):
163-
if keypoints[i, 2] > 0:
164-
x, y = keypoints[i][:2].astype(int)
165-
tags.append(tag[i, y, x])
166-
prev_tag = np.mean(tags, axis=0)
167-
168-
# Allocate the buffer for tags similarity matrix.
169-
tag_copy = np.empty_like(tag[0, ..., 0])
170-
for i, (_heatmap, _tag) in enumerate(zip(heatmap, tag)):
171-
if keypoints[i, 2] > 0:
172-
continue
173-
tag_copy[...] = _tag[..., 0]
174-
diff = tag_copy
175-
diff -= prev_tag
176-
np.abs(diff, out=diff)
177-
np.floor(diff + 0.5, out=diff)
178-
diff -= _heatmap
179-
idx = diff.argmin()
180-
y, x = np.divmod(idx, _heatmap.shape[-1])
181-
182-
# detection score at maximum position
183-
val = _heatmap[y, x]
184-
185-
if val > 0:
186-
keypoints[i, :3] = x, y, val
187-
if 1 < x < W - 1 and 1 < y < H - 1:
188-
diff = np.array([
189-
_heatmap[y, x + 1] - _heatmap[y, x - 1],
190-
_heatmap[y + 1, x] - _heatmap[y - 1, x]
191-
])
192-
keypoints[i, :2] += np.sign(diff) * .25
193-
194-
return keypoints
195-
196-
def __call__(self, heatmaps, tags, nms_heatmaps=None):
197-
ans = self.match(**self.top_k(nms_heatmaps, tags))
198-
ans, ans_tags = map(list, zip(*ans))
199-
200-
if self.do_adjust:
201-
ans = self.adjust(ans, heatmaps)
202-
203-
if self.delta != 0.0:
204-
for people in ans:
205-
for person in people:
206-
for joint in person:
207-
joint[:2] += self.delta
208-
209-
ans = ans[0]
210-
scores = np.asarray([i[:, 2].mean() for i in ans])
211-
212-
if self.do_refine:
213-
heatmap_numpy = heatmaps[0]
214-
tag_numpy = tags[0]
215-
for i in range(len(ans)):
216-
ans[i] = self.refine(heatmap_numpy, tag_numpy, ans[i], ans_tags[0][i])
217-
218-
return ans, scores
17+
from .decoder_ae import AssociativeEmbeddingDecoder
18+
from .decoder_openpose import OpenPoseDecoder

0 commit comments

Comments
 (0)