From 9ff8653d999c8a22bc8f1ff4f4a8a3cc5b63d255 Mon Sep 17 00:00:00 2001 From: Sarim Mehdi Date: Wed, 25 Aug 2021 17:10:21 +0200 Subject: [PATCH] Made inference faster (this is especially useful when using Yolo9000) (#8009) * Update network.c custom_get_region_detections function now keeps track of class index with the highest probability. * Update darknet.h Added best_class_idx to detection struct * Update darknet.py added python code for faster negative removal and also faster non-max suppression --- darknet.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++ include/darknet.h | 1 + src/network.c | 6 +++++ 3 files changed, 73 insertions(+) diff --git a/darknet.py b/darknet.py index 698f0469ad5..ebb0eede210 100644 --- a/darknet.py +++ b/darknet.py @@ -24,6 +24,7 @@ class BOX(Structure): class DETECTION(Structure): _fields_ = [("bbox", BOX), ("classes", c_int), + ("best_class_idx", c_int), ("prob", POINTER(c_float)), ("mask", POINTER(c_float)), ("objectness", c_float), @@ -133,6 +134,56 @@ def decode_detection(detections): decoded.append((str(label), confidence, bbox)) return decoded +# https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ +# Malisiewicz et al. +def non_max_suppression_fast(detections, overlap_thresh): + boxes = [] + for detection in detections: + _, _, _, (x, y, w, h) = detection + x1 = x - w / 2 + y1 = y - h / 2 + x2 = x + w / 2 + y2 = y + h / 2 + boxes.append(np.array([x1, y1, x2, y2])) + boxes_array = np.array(boxes) + + # initialize the list of picked indexes + pick = [] + # grab the coordinates of the bounding boxes + x1 = boxes_array[:, 0] + y1 = boxes_array[:, 1] + x2 = boxes_array[:, 2] + y2 = boxes_array[:, 3] + # compute the area of the bounding boxes and sort the bounding + # boxes by the bottom-right y-coordinate of the bounding box + area = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = np.argsort(y2) + # keep looping while some indexes still remain in the indexes + # list + while len(idxs) > 0: + # grab the last index in the indexes list and add the + # index value to the list of picked indexes + last = len(idxs) - 1 + i = idxs[last] + pick.append(i) + # find the largest (x, y) coordinates for the start of + # the bounding box and the smallest (x, y) coordinates + # for the end of the bounding box + xx1 = np.maximum(x1[i], x1[idxs[:last]]) + yy1 = np.maximum(y1[i], y1[idxs[:last]]) + xx2 = np.minimum(x2[i], x2[idxs[:last]]) + yy2 = np.minimum(y2[i], y2[idxs[:last]]) + # compute the width and height of the bounding box + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + # compute the ratio of overlap + overlap = (w * h) / area[idxs[:last]] + # delete all indexes from the index list that have + idxs = np.delete(idxs, np.concatenate(([last], + np.where(overlap > overlap_thresh)[0]))) + # return only the bounding boxes that were picked using the + # integer data type + return [detections[i] for i in pick] def remove_negatives(detections, class_names, num): """ @@ -148,6 +199,21 @@ def remove_negatives(detections, class_names, num): return predictions +def remove_negatives_faster(detections, class_names, num): + """ + Faster version of remove_negatives (very useful when using yolo9000) + """ + predictions = [] + for j in range(num): + if detections[j].best_class_idx == -1: + continue + name = class_names[detections[j].best_class_idx] + bbox = detections[j].bbox + bbox = (bbox.x, bbox.y, bbox.w, bbox.h) + predictions.append((name, detections[j].prob[detections[j].best_class_idx], bbox)) + return predictions + + def detect_image(network, class_names, image, thresh=.5, hier_thresh=.5, nms=.45): """ Returns a list with highest confidence class and their bbox diff --git a/include/darknet.h b/include/darknet.h index 5f225ab2763..d72027cc45f 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -899,6 +899,7 @@ typedef struct ious { typedef struct detection{ box bbox; int classes; + int best_class_idx; float *prob; float *mask; float objectness; diff --git a/src/network.c b/src/network.c index f45c7902c2c..92379f053f4 100644 --- a/src/network.c +++ b/src/network.c @@ -885,7 +885,13 @@ void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, f dets[j].classes = l.classes; dets[j].bbox = boxes[j]; dets[j].objectness = 1; + float highest_prob = 0; + dets[j].best_class_idx = -1; for (i = 0; i < l.classes; ++i) { + if (probs[j][i] > highest_prob) { + highest_prob = probs[j][i]; + dets[j].best_class_idx = i; + } dets[j].prob[i] = probs[j][i]; } }