Skip to content


instance segmentation task is added.
Browse files Browse the repository at this point in the history
  • Loading branch information
nerminsamet committed Apr 13, 2021
1 parent 2436441 commit 95f7d36
Show file tree
Hide file tree
Showing 11 changed files with 759 additions and 7 deletions.
151 changes: 151 additions & 0 deletions src/lib/datasets/dataset/
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import numpy as np
import json
import os

import as data

class COCOSEG(data.Dataset):
num_classes = 80
default_resolution = [512, 512]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)

def __init__(self, opt, split):
super(COCOSEG, self).__init__()
self.data_dir = os.path.join(opt.data_dir, opt.coco_dir)
# self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
self.img_dir = os.path.join(self.data_dir + '/images', '{}2017'.format(split))
if split == 'test':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
if opt.task == 'exdet':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
self.annot_path = os.path.join(
self.data_dir, 'annotations',
self.max_objs = 70
self.class_name = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
self._valid_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90]
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
for v in range(1, self.num_classes + 1)]
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
# self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
# self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)

self.split = split
self.opt = opt

print('==> initializing coco 2017 {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
self.images = self.coco.getImgIds()
self.num_samples = len(self.images)

print('Loaded {} {} samples'.format(split, self.num_samples))

def _to_float(self, x):
return float("{:.2f}".format(x))

def convert_eval_format(self, all_bboxes):
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = self._valid_ids[cls_ind - 1]
if type(all_bboxes[image_id][cls_ind]) == dict:
for id in range(len(all_bboxes[image_id][cls_ind]['boxs'])):
bbox = all_bboxes[image_id][cls_ind]['boxs'][id]
mask = all_bboxes[image_id][cls_ind]['pred_mask'][id]
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = bbox[4]
bbox_out = list(map(self._to_float, bbox[0:4]))

detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score)),
"segmentation": mask
for bbox in all_bboxes[image_id][cls_ind]:
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = bbox[4]
bbox_out = list(map(self._to_float, bbox[0:4]))

detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score))
if len(bbox) > 5:
extreme_points = list(map(self._to_float, bbox[5:13]))
detection["extreme_points"] = extreme_points
return detections

def __len__(self):
return self.num_samples

def save_results(self, results, save_dir):
open('{}/results.json'.format(save_dir), 'w'))

def run_eval(self, results, save_dir):

detections = self.convert_eval_format(results)
coco_dets = self.coco.loadRes(detections)
coco_eval = COCOeval(self.coco, coco_dets, "bbox")

coco_eval = COCOeval(self.coco, coco_dets, "segm")
8 changes: 6 additions & 2 deletions src/lib/datasets/
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,29 @@
from .sample.exdet import EXDetDataset
from .sample.ctdet import CTDetDataset
from .sample.multi_pose import MultiPoseDataset
from .sample.ctseg import CTSegDataset

from src.lib.datasets.dataset.coco import COCO
from src.lib.datasets.dataset.pascal import PascalVOC
from src.lib.datasets.dataset.kitti import KITTI
from src.lib.datasets.dataset.coco_hp import COCOHP
from src.lib.datasets.dataset.coco_seg import COCOSEG

dataset_factory = {
'coco': COCO,
'pascal': PascalVOC,
'kitti': KITTI,
'coco_hp': COCOHP
'coco_hp': COCOHP,
'coco_seg': COCOSEG

_sample_factory = {
'exdet': EXDetDataset,
'ctdet': CTDetDataset,
'ddd': DddDataset,
'multi_pose': MultiPoseDataset
'multi_pose': MultiPoseDataset,
'ctseg': CTSegDataset

Expand Down
159 changes: 159 additions & 0 deletions src/lib/datasets/sample/
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import as data
import numpy as np
import torch
import json
import cv2
import os
from src.lib.utils.image import flip, color_aug
from src.lib.utils.image import get_affine_transform, affine_transform
from src.lib.utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from src.lib.utils.image import draw_dense_reg
import math

class CTSegDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
return bbox

def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i

def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)

img = cv2.imread(img_path)

height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
if self.opt.keep_res:
input_h = (height | self.opt.pad) + 1
input_w = (width | self.opt.pad) + 1
s = np.array([input_w, input_h], dtype=np.float32)
s = max(img.shape[0], img.shape[1]) * 1.0
input_h, input_w = self.opt.input_h, self.opt.input_w

flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
c[1] += s * np.clip(np.random.randn() * cf, -2 * cf, 2 * cf)
s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)

if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1

trans_input = get_affine_transform(
c, s, 0, [input_w, input_h])
inp = cv2.warpAffine(img, trans_input,
(input_w, input_h),
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)

output_h = input_h // self.opt.down_ratio
output_w = input_w // self.opt.down_ratio
num_classes = self.num_classes
trans_output = get_affine_transform(c, s, 0, [output_w, output_h])

hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
gtboxes = np.zeros((self.max_objs, 4), dtype=np.float32)
dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32)
cat_spec_mask = np.zeros((self.max_objs, num_classes), dtype=np.uint8)
instance_masks = np.zeros((self.max_objs, output_h,output_w),dtype=np.float32)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \

gt_det = []

for k in range(num_objs):

ann = anns[k]
instance_mask = self.coco.annToMask(ann)

bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(self.cat_ids[ann['category_id']])
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
instance_mask = instance_mask[:, ::-1]
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
instance_mask= cv2.warpAffine(instance_mask, trans_output,
(output_w, output_h),
instance_mask = instance_mask.astype(np.float32)

h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
radius = self.opt.hm_gauss if self.opt.mse_loss else radius
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
draw_gaussian(hm[cls_id], ct_int, radius)
gtboxes[k] = bbox
wh[k] = 1. * w, 1. * h
ind[k] = ct_int[1] * output_w + ct_int[0]
reg[k] = ct - ct_int
reg_mask[k] = 1
cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
cat_spec_mask[k, cls_id] = 1
instance_masks[k] = instance_mask
if self.opt.dense_wh:
draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])

ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
"instance_mask":instance_masks, 'gtboxes':gtboxes, 'cat_spec_mask': cat_spec_mask}
if self.opt.dense_wh:
hm_a = hm.max(axis=0, keepdims=True)
dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
del ret['wh']
elif self.opt.cat_spec_wh:
ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
del ret['wh']
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 6), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret

0 comments on commit 95f7d36

Please sign in to comment.