|
| 1 | +import math |
| 2 | +from random import shuffle |
| 3 | + |
| 4 | +import cv2 |
| 5 | +import numpy as np |
| 6 | +from PIL import Image |
| 7 | +from tensorflow import keras |
| 8 | +from configs import * |
| 9 | +from utils.utils import * |
| 10 | + |
| 11 | + |
| 12 | +def read_lines(annotation_path): |
| 13 | + with open(annotation_path) as f: |
| 14 | + annot_lines = f.readlines() |
| 15 | + return annot_lines |
| 16 | + |
| 17 | + |
| 18 | +def load_img_bboxes_pairs(annotation_path): |
| 19 | + """ |
| 20 | + Load annotations |
| 21 | + Customize this function as per your dataset |
| 22 | + :return: |
| 23 | + list of pairs of image path and corresponding bounding boxes |
| 24 | + example: |
| 25 | + [['.../00_Datasets/PASCAL_VOC/images/000007.jpg', [[0.639, 0.567, 0.718, 0.840, 6.0], |
| 26 | + [0.529, 0.856, 0.125, 0.435, 4.0]]] |
| 27 | + ['.../00_Datasets/PASCAL_VOC/images/000008.jpg', [[0.369, 0.657, 0.871, 0.480, 3.0]]]] |
| 28 | + """"" |
| 29 | + lines = read_lines(annotation_path) |
| 30 | + |
| 31 | + img_bboxes_pairs = [[line.split()[0], np.array([list(map(int, box.split(','))) for box in line.split()[1:]])] |
| 32 | + for line in lines] |
| 33 | + |
| 34 | + return img_bboxes_pairs |
| 35 | + |
| 36 | + |
| 37 | +class YoloDataGenerator(keras.utils.Sequence): |
| 38 | + def __init__(self, mode): |
| 39 | + self.img_bboxes_pairs = load_img_bboxes_pairs({'train': TRAIN_ANNOT_PATH, 'val': VAL_ANNOT_PATH}[mode]) |
| 40 | + self.data_aug = {'train': TRAIN_DATA_AUG, 'val': VAL_DATA_AUG}[mode] |
| 41 | + self.batch_size = {'train': TRAIN_BATCH_SIZE, 'val': VAL_BATCH_SIZE}[mode] |
| 42 | + self.input_shape = IMAGE_SIZE |
| 43 | + self.anchors_mask = YOLO_ANCHORS_MASK |
| 44 | + self.num_scales = len(self.anchors_mask) |
| 45 | + self.num_images = len(self.img_bboxes_pairs) |
| 46 | + self.classes, self.num_classes = get_classes(PATH_CLASSES) |
| 47 | + self.anchors, self.num_anchors = get_anchors(PATH_ANCHORS) |
| 48 | + |
| 49 | + def __len__(self): |
| 50 | + """ |
| 51 | + Denotes the number of batches per epoch |
| 52 | + """"" |
| 53 | + return math.ceil(self.num_images / float(self.batch_size)) |
| 54 | + |
| 55 | + def __getitem__(self, index): |
| 56 | + """ |
| 57 | + Generate one batch of data when the batch corresponding to a given index |
| 58 | + is called, the generator executes the __getitem__ method to generate it. |
| 59 | + """"" |
| 60 | + # Generate indexes for a batch |
| 61 | + batch_indexes = range(index * self.batch_size, (index + 1) * self.batch_size) |
| 62 | + |
| 63 | + # Generate data |
| 64 | + image_data, y_true = self.__data_generation(batch_indexes) |
| 65 | + |
| 66 | + return [image_data, *y_true], np.zeros(self.batch_size) |
| 67 | + |
| 68 | + def on_epoch_begin(self): |
| 69 | + """ |
| 70 | + Shuffle indexes at start of each epoch |
| 71 | + """"" |
| 72 | + # Shuffle the dataset |
| 73 | + if self.shuffle: |
| 74 | + np.random.shuffle(self.img_bboxes_pairs) |
| 75 | + |
| 76 | + def process_data(self, img_bboxes_pair, data_aug=False, max_boxes=20, proc_img=True): |
| 77 | + """ |
| 78 | + Random preprocessing for real-time data augmentation |
| 79 | + """"" |
| 80 | + image = Image.open(img_bboxes_pair[0]) |
| 81 | + iw, ih = image.size |
| 82 | + h, w = self.input_shape |
| 83 | + box = img_bboxes_pair[1] |
| 84 | + |
| 85 | + # resize image |
| 86 | + scale = min(w / iw, h / ih) |
| 87 | + nw = int(iw * scale) |
| 88 | + nh = int(ih * scale) |
| 89 | + dx = (w - nw) // 2 |
| 90 | + dy = (h - nh) // 2 |
| 91 | + |
| 92 | + if data_aug: |
| 93 | + # todo augmentation |
| 94 | + image = image |
| 95 | + |
| 96 | + image_data = Image.new('RGB', (w, h), (128, 128, 128)) |
| 97 | + if proc_img: |
| 98 | + image = image.resize((nw, nh), Image.BICUBIC) |
| 99 | + image_data.paste(image, (dx, dy)) |
| 100 | + image_data = np.array(image_data) / 255. |
| 101 | + |
| 102 | + # correct boxes |
| 103 | + box_data = np.zeros((max_boxes, 5)) |
| 104 | + if box.shape[0] > 0: |
| 105 | + np.random.shuffle(box) |
| 106 | + if box.shape[0] > max_boxes: |
| 107 | + box = box[:max_boxes] |
| 108 | + box[:, [0, 2]] = box[:, [0, 2]] * scale + dx |
| 109 | + box[:, [1, 3]] = box[:, [1, 3]] * scale + dy |
| 110 | + box_data[:box.shape[0]] = box |
| 111 | + |
| 112 | + return image_data, box_data |
| 113 | + |
| 114 | + def preprocess_true_boxes(self, true_boxes): |
| 115 | + """ |
| 116 | + Preprocess true boxes to training input format |
| 117 | + :param |
| 118 | + true_boxes: array, shape=(m, T, 5) |
| 119 | + Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. |
| 120 | + :returns: |
| 121 | + y_true: list of array, shape like yolo_outputs, xywh are relative value |
| 122 | + """"" |
| 123 | + assert (true_boxes[..., 4] < self.num_classes).all(), 'class id must be less than num_classes' |
| 124 | + anchor_mask = YOLO_ANCHORS_MASK |
| 125 | + input_shape = np.array(self.input_shape, dtype='int32') |
| 126 | + |
| 127 | + # (x_min, y_min, x_max, y_max) is converted to (x_center, y_center, width, height) relative to input shape |
| 128 | + boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2 |
| 129 | + boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2] |
| 130 | + true_boxes[..., 0:2] = boxes_xy / input_shape[::-1] |
| 131 | + true_boxes[..., 2:4] = boxes_wh / input_shape[::-1] |
| 132 | + |
| 133 | + # grid shapes for 3 scales |
| 134 | + grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[s] for s in range(self.num_scales)] |
| 135 | + |
| 136 | + # initialise y_true |
| 137 | + # [num_scales][batch_size x (grid_shape_0 x grid_shape_1) x num_anchors_per_scale x (5 + num_classes)] |
| 138 | + y_true = [np.zeros((self.batch_size, grid_shapes[s][0], grid_shapes[s][1], anchor_mask[s].__len__(), |
| 139 | + 5 + self.num_classes), dtype='float32') for s in range(self.num_scales)] |
| 140 | + |
| 141 | + # Expand dim to apply broadcasting |
| 142 | + anchors = np.expand_dims(self.anchors, 0) |
| 143 | + anchor_maxes = anchors / 2. |
| 144 | + anchor_mins = -anchor_maxes |
| 145 | + # find anchor area |
| 146 | + anchor_area = anchors[..., 0] * anchors[..., 1] |
| 147 | + |
| 148 | + # number of non zero boxes |
| 149 | + num_nz_boxes = (np.count_nonzero(boxes_wh, axis=1).sum(axis=1) / 2).astype('int32') |
| 150 | + |
| 151 | + for b_idx in range(self.batch_size): |
| 152 | + # Discard zero rows |
| 153 | + box_wh = boxes_wh[b_idx, 0:num_nz_boxes[b_idx]] |
| 154 | + if box_wh.shape[0] == 0: |
| 155 | + continue |
| 156 | + |
| 157 | + # Expand dim to apply broadcasting |
| 158 | + box_wh = np.expand_dims(box_wh, -2) |
| 159 | + box_maxes = box_wh / 2. |
| 160 | + box_mins = -box_maxes |
| 161 | + # find box area |
| 162 | + box_area = box_wh[..., 0] * box_wh[..., 1] |
| 163 | + |
| 164 | + # find intersection area |
| 165 | + intersect_mins = np.maximum(box_mins, anchor_mins) |
| 166 | + intersect_maxes = np.minimum(box_maxes, anchor_maxes) |
| 167 | + intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) |
| 168 | + intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] |
| 169 | + |
| 170 | + # find iou |
| 171 | + iou_anchors = intersect_area / (box_area + anchor_area - intersect_area) |
| 172 | + |
| 173 | + # Find best anchor for each true box |
| 174 | + best_anchor_indices = np.argmax(iou_anchors, axis=-1) |
| 175 | + |
| 176 | + # y_true shape: |
| 177 | + # [num_scales][batch_size x (grid_shape_0 x grid_shape_1) x num_anchors_per_scale x (5 + num_classes)] |
| 178 | + for box_no, anchor_idx in enumerate(best_anchor_indices): |
| 179 | + # [[6, 7, 8], [3, 4, 5], [0, 1, 2]] |
| 180 | + scale_idx, scale, anchor_on_scale = [(x, tuple(grid_shapes[x]), anchor_mask[x].index(anchor_idx)) |
| 181 | + for x in range(anchor_mask.__len__()) |
| 182 | + if anchor_idx in anchor_mask[x]][0] |
| 183 | + |
| 184 | + # dimensions of a single box |
| 185 | + x, y, width, height, class_label = true_boxes[b_idx, box_no, 0:5] |
| 186 | + |
| 187 | + # index of the grid cell having the center of the bbox |
| 188 | + i = np.floor(y * scale[0]).astype('int32') |
| 189 | + j = np.floor(x * scale[1]).astype('int32') |
| 190 | + |
| 191 | + # fill y_true |
| 192 | + y_true[scale_idx][b_idx, i, j, anchor_on_scale, 0:4] = np.array([x, y, width, height]) |
| 193 | + y_true[scale_idx][b_idx, i, j, anchor_on_scale, 4] = 1 |
| 194 | + y_true[scale_idx][b_idx, i, j, anchor_on_scale, 5 + int(class_label)] = 1 |
| 195 | + |
| 196 | + return y_true |
| 197 | + |
| 198 | + def __data_generation(self, batch_indexes): |
| 199 | + """ |
| 200 | + Generates data containing batch_size samples |
| 201 | + """"" |
| 202 | + image_data = [] |
| 203 | + box_data = [] |
| 204 | + for i in batch_indexes: |
| 205 | + image, box = self.process_data(self.img_bboxes_pairs[i], self.data_aug) |
| 206 | + box = np.floor(box / 2.5) |
| 207 | + image_data.append(image) |
| 208 | + box_data.append(box) |
| 209 | + |
| 210 | + image_data = np.array(image_data) |
| 211 | + box_data = np.array(box_data) |
| 212 | + y_true = self.preprocess_true_boxes(box_data) |
| 213 | + |
| 214 | + return image_data, y_true |
| 215 | + |
| 216 | + |
| 217 | +if __name__ == "__main__": |
| 218 | + |
| 219 | + ydg = YoloDataGenerator('train') |
| 220 | + num_batches = ydg.__len__() |
| 221 | + X0, y0 = ydg.__getitem__(0) |
| 222 | + X1, y1 = ydg.__getitem__(1) |
0 commit comments