diff --git a/libs/common/img_proc.py b/libs/common/img_proc.py index 4cab34c..6e5d683 100644 --- a/libs/common/img_proc.py +++ b/libs/common/img_proc.py @@ -40,8 +40,7 @@ def get_affine_transform(center, center = np.array(center) scale_tmp = scale * SIZE src_w = scale_tmp[0] - dst_w = output_size[0] - dst_h = output_size[1] + dst_h, dst_w = output_size rot_rad = np.pi * rot / 180 src_dir = get_dir([0, src_w * -0.5], rot_rad) @@ -215,7 +214,7 @@ def crop_single_instance(data_numpy, bbox, joints, parameters, pth_trans=None): """ Crop an instance from an image given the bounding box and part coordinates. """ - reso = parameters['input_size'] + reso = parameters['input_size'] # (height, width) transformed_joints = joints.copy() if parameters['jitter_bbox']: bbox, joints = jitter_bbox_with_kpts_no_occlu(bbox, @@ -232,7 +231,7 @@ def crop_single_instance(data_numpy, bbox, joints, parameters, pth_trans=None): trans = get_affine_transform(c, s, 0.0, reso) input = cv2.warpAffine(data_numpy, trans, - (int(reso[0]), int(reso[1])), + (int(reso[1]), int(reso[0])), flags=cv2.INTER_LINEAR ) # add two more channels to encode object location @@ -366,7 +365,7 @@ def generate_target(joints, joints_vis, parameters): assert target_type == 'gaussian', 'Only support gaussian map now!' if target_type == 'gaussian': - target = np.zeros((num_joints, heatmap_size[1], heatmap_size[0]), + target = np.zeros((num_joints, heatmap_size[0], heatmap_size[1]), dtype=np.float32) tmp_size = sigma * 3 @@ -380,7 +379,7 @@ def generate_target(joints, joints_vis, parameters): # Check that any part of the gaussian is in-bounds ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] - if ul[0] >= heatmap_size[0] or ul[1] >= heatmap_size[1] \ + if ul[0] >= heatmap_size[1] or ul[1] >= heatmap_size[0] \ or br[0] < 0 or br[1] < 0: # If not, just return the image as is target_weight[joint_id] = 0 @@ -395,11 +394,11 @@ def generate_target(joints, joints_vis, parameters): g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) # Usable gaussian range - g_x = max(0, -ul[0]), min(br[0], heatmap_size[0]) - ul[0] - g_y = max(0, -ul[1]), min(br[1], heatmap_size[1]) - ul[1] + g_x = max(0, -ul[0]), min(br[0], heatmap_size[1]) - ul[0] + g_y = max(0, -ul[1]), min(br[1], heatmap_size[0]) - ul[1] # Image range - img_x = max(0, ul[0]), min(br[0], heatmap_size[0]) - img_y = max(0, ul[1]), min(br[1], heatmap_size[1]) + img_x = max(0, ul[0]), min(br[0], heatmap_size[1]) + img_y = max(0, ul[1]), min(br[1], heatmap_size[0]) target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ g[g_y[0]:g_y[1], g_x[0]:g_x[1]] diff --git a/libs/dataset/KITTI/car_instance.py b/libs/dataset/KITTI/car_instance.py index 0e65f6b..1d118ec 100644 --- a/libs/dataset/KITTI/car_instance.py +++ b/libs/dataset/KITTI/car_instance.py @@ -496,11 +496,13 @@ def _get_data_parameters(self, cfgs): # parameters relevant to heatmap regression model and image data augmentation if 'heatmapModel' in cfgs: hm = cfgs['heatmapModel'] + jitter_flag = hm['jitter_bbox'] and self.split=='train' and cfgs['train'] self.hm_para = {'reference': 'bbox', 'resize': True, 'add_xy': hm['add_xy'], - 'jitter_bbox': hm['jitter_bbox'] and self.split=='train', + 'jitter_bbox': jitter_flag, 'jitter_params': hm['jitter_params'], + # (height, width) 'input_size': np.array([hm['input_size'][1], hm['input_size'][0]]), 'heatmap_size': np.array([hm['heatmap_size'][1], diff --git a/libs/loss/function.py b/libs/loss/function.py index e8233fa..8080a2a 100644 --- a/libs/loss/function.py +++ b/libs/loss/function.py @@ -162,8 +162,8 @@ def calc_coor_loss(self, coordinates_pred, coordinates_gt): coordinates_pred: [N, K, 2] coordinates_gt: [N, K, 2] """ - coordinates_gt[:, :, 0] /= self.img_size[1] - coordinates_gt[:, :, 1] /= self.img_size[0] + coordinates_gt[:, :, 0] /= self.img_size[0] + coordinates_gt[:, :, 1] /= self.img_size[1] loss = self.comp_dict['coor'][0](coordinates_pred, coordinates_gt) return loss diff --git a/libs/metric/criterions.py b/libs/metric/criterions.py index a7c8c97..b7417db 100644 --- a/libs/metric/criterions.py +++ b/libs/metric/criterions.py @@ -91,6 +91,7 @@ def get_distance_src(output, else: raise NotImplementedError image_size = image_size if cfgs is None else cfgs['heatmapModel']['input_size'] + width, height = image_size # multiply by down-sample ratio if not isinstance(pred, np.ndarray): pred = pred.data.cpu().numpy() @@ -98,7 +99,7 @@ def get_distance_src(output, max_vals = max_vals.data.cpu().numpy() # the coordinates need to be rescaled for different cases if type(output) is tuple: - pred *= image_size[0] + pred *= np.array(image_size).reshape(1, 1, 2) else: pred *= image_size[0] / output.shape[3] # inverse transform and compare pixel didstance @@ -120,7 +121,7 @@ def get_distance_src(output, trans_inv = lip.get_affine_transform(centers[sample_idx], scales[sample_idx], rots[sample_idx], - image_size, + (height, width), inv=1 ) joints_original = joints_original_batch[sample_idx] diff --git a/libs/model/heatmapModel/hrnet.py b/libs/model/heatmapModel/hrnet.py index b13cba7..e296dbd 100644 --- a/libs/model/heatmapModel/hrnet.py +++ b/libs/model/heatmapModel/hrnet.py @@ -422,6 +422,8 @@ def __init__(self, cfgs, **kwargs): ) elif self.head_type == 'coordinates': num_chan = self.num_joints + map_width, map_height = cfgs['heatmapModel']['heatmap_size'] + ks = (int(map_height / 16), int(map_width / 16)) self.head1 = nn.Sequential( nn.Conv2d( in_channels=pre_stage_channels[0], @@ -432,7 +434,6 @@ def __init__(self, cfgs, **kwargs): ), ) self.head2 = nn.Sequential( - # produce 8*8*num_joints tensor BasicBlock(num_chan+2, num_chan*2, stride=2, @@ -453,11 +454,10 @@ def __init__(self, cfgs, **kwargs): stride=2, downsample=basicdownsample(num_chan*2, num_chan*2) ), - nn.Conv2d(num_chan*2, num_chan*2, kernel_size=4), + nn.Conv2d(num_chan*2, num_chan*2, kernel_size=ks), nn.Sigmoid() ) # coordinate convolution makes arg-max easier - map_height, map_width = cfgs['heatmapModel']['heatmap_size'] x_map = np.tile(np.linspace(0, 1, map_width), (map_height, 1)) x_map = x_map.reshape(1, 1, map_height, map_width) y_map = np.linspace(0, 1, map_height).reshape(map_height, 1) @@ -641,6 +641,7 @@ def init_weights(self, pretrained=''): or self.pretrained_layers[0] == '*': need_init_state_dict[name] = m self.load_state_dict(need_init_state_dict, strict=False) + logger.info('{:d} modules initialized.'.format(len(need_init_state_dict))) elif pretrained: logger.error('=> please download pre-trained models first!') raise ValueError('{} does not exist!'.format(pretrained)) diff --git a/libs/trainer/trainer.py b/libs/trainer/trainer.py index a5edc33..e76720c 100644 --- a/libs/trainer/trainer.py +++ b/libs/trainer/trainer.py @@ -452,13 +452,14 @@ def apply_dropout(m): cfgs.get('exp_type') == 'instanceto2d': joints_pred = prediction[1].data.cpu().numpy() image_size = cfgs['heatmapModel']['input_size'] + joints_pred *= np.array(image_size).reshape(1, 1, 2) save_debug_images(0, batch_idx, cfgs, data, meta, target, - {'joints_pred': joints_pred * image_size[0]}, + {'joints_pred': joints_pred}, prediction, 'validation' ) diff --git a/libs/visualization/debug.py b/libs/visualization/debug.py index 3f91df4..6e9deaf 100644 --- a/libs/visualization/debug.py +++ b/libs/visualization/debug.py @@ -34,13 +34,13 @@ def draw_circles(ndarr, break joints = batch_joints[k] for idx, joint in enumerate(joints): - joint[0] = x * width + padding + joint[0] - joint[1] = y * height + padding + joint[1] - cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, color, 2) + xpos = x * width + padding + joint[0] + ypos = y * height + padding + joint[1] + cv2.circle(ndarr, (int(xpos), int(ypos)), 2, color, 2) if add_idx: cv2.putText(ndarr, str(idx+1), - (int(joint[0]), int(joint[1])), + (int(xpos), int(ypos)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 1 ) diff --git a/tools/train_IGRs.py b/tools/train_IGRs.py index 9706618..92d10ff 100644 --- a/tools/train_IGRs.py +++ b/tools/train_IGRs.py @@ -121,7 +121,7 @@ def evaluate(model, model_settings, GPUs, cfgs, logger, final_output_dir, eval_t trainer.evaluate(valid_dataset, model, loss_func, cfgs, logger, evaluator, collate_fn=collate_fn) if eval_train: logger.info("Evaluation on the training split:") - trainer.evaluate(train_dataset, model, loss_func, cfgs, logger, evaluator) + trainer.evaluate(train_dataset, model, loss_func, cfgs, logger, evaluator, collate_fn=collate_fn) return def main():