Skip to content

Commit

Permalink
Show track_id for models with contrastive learning
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexeyAB committed Jul 7, 2020
1 parent 8c77a3d commit ccb392d
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 5 deletions.
6 changes: 5 additions & 1 deletion build/darknet/x64/darknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ class DETECTION(Structure):
("objectness", c_float),
("sort_class", c_int),
("uc", POINTER(c_float)),
("points", c_int)]
("points", c_int),
("embeddings", POINTER(c_float)),
("embedding_size", c_int),
("sim", c_float),
("track_id", c_int)]

class DETNUMPAIR(Structure):
_fields_ = [("num", c_int),
Expand Down
6 changes: 5 additions & 1 deletion darknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ class DETECTION(Structure):
("objectness", c_float),
("sort_class", c_int),
("uc", POINTER(c_float)),
("points", c_int)]
("points", c_int),
("embeddings", POINTER(c_float)),
("embedding_size", c_int),
("sim", c_float),
("track_id", c_int)]

class DETNUMPAIR(Structure):
_fields_ = [("num", c_int),
Expand Down
9 changes: 9 additions & 0 deletions include/darknet.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ struct layer {
int embedding_layer_id;
float *embedding_output;
int embedding_size;
float sim_thresh;
int track_history_size;
int coords;
int background;
int rescore;
Expand Down Expand Up @@ -867,6 +869,9 @@ typedef struct detection{
float *uc; // Gaussian_YOLOv3 - tx,ty,tw,th uncertainty
int points; // bit-0 - center, bit-1 - top-left-corner, bit-2 - bottom-right-corner
float *embeddings; // embeddings for tracking
int embedding_size;
float sim;
int track_id;
} detection;

// network.c -batch inference
Expand Down Expand Up @@ -1063,6 +1068,10 @@ void stop_timer_and_show();
void stop_timer_and_show_name(char *name);
void show_total_time();

void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim_thresh, int deque_size);
int fill_remaining_id(detection *new_dets, int new_dets_num, int new_track_id, float thresh);


// gemm.h
LIB_API void init_cpu();

Expand Down
9 changes: 7 additions & 2 deletions src/demo.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,11 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int

int i;
for (i = 0; i < net.n; ++i) {
layer l = net.layers[i];
if (l.type == YOLO) l.mean_alpha = 1.0 / avg_frames;
layer lc = net.layers[i];
if (lc.type == YOLO) {
lc.mean_alpha = 1.0 / avg_frames;
l = lc;
}
}

if (l.classes != demo_classes) {
Expand Down Expand Up @@ -262,6 +265,8 @@ void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int
else diounms_sort(local_dets, local_nboxes, l.classes, nms, l.nms_kind, l.beta_nms);
}

if (l.embedding_size) set_track_id(local_dets, local_nboxes, demo_thresh, l.sim_thresh, l.track_history_size);

//printf("\033[2J");
//printf("\033[1;1H");
//printf("\nFPS:%.1f\n", fps);
Expand Down
126 changes: 126 additions & 0 deletions src/http_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,3 +755,129 @@ void stop_timer_and_show() {
void stop_timer_and_show_name(char *name) { stop_timer_and_show(); }
void total_time() {}
#endif // C++11

#include <deque>
#include <vector>
#include <iostream>
#include "blas.h"
#include "utils.h"

struct similarity_detections_t {
int old_id, new_id;
float sim;
};

int check_prob(detection det, float thresh)
{
for (int i = 0; i < det.classes; ++i) {
if (det.prob[i] > thresh) return 1;
}
return 0;
}

int fill_remaining_id(detection *new_dets, int new_dets_num, int new_track_id, float thresh)
{
for (int i = 0; i < new_dets_num; ++i) {
if (new_dets[i].track_id == 0 && check_prob(new_dets[i], thresh)) {
//printf(" old_tid = %d, new_tid = %d, sim = %f \n", new_dets[i].track_id, new_track_id, new_dets[i].sim);
new_dets[i].track_id = new_track_id;
new_track_id++;
}
}
return new_track_id;
}

float *make_float_array(float* src, size_t size)
{
float *dst = (float*)xcalloc(size, sizeof(float));
memcpy(dst, src, size*sizeof(float));
return dst;
}

struct detection_t : detection {
detection_t(detection det) : detection(det)
{
if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size);
if (prob) prob = make_float_array(det.prob, classes);
if (uc) uc = make_float_array(det.uc, 4);
}

detection_t(detection_t const& det) : detection(det)
{
if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size);
if (prob) prob = make_float_array(det.prob, classes);
if (uc) uc = make_float_array(det.uc, 4);
}

~detection_t() {
if (embeddings) free(embeddings);
if (prob) free(prob);
if (uc) free(uc);
}
};



void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim_thresh, int deque_size)
{
static int new_track_id = 1;
static std::deque<std::vector<detection_t>> old_dets_dq;

// copy detections from queue of vectors to the one vector
std::vector<detection_t> old_dets;
for (std::vector<detection_t> &v : old_dets_dq) {
for (int i = 0; i < v.size(); ++i) {
old_dets.push_back(v[i]);
}
}

std::vector<similarity_detections_t> sim_det(old_dets.size() * new_dets_num);

// calculate similarity
for (int old_id = 0; old_id < old_dets.size(); ++old_id) {
for (int new_id = 0; new_id < new_dets_num; ++new_id) {
const int index = old_id*new_dets_num + new_id;
const float sim = cosine_similarity(new_dets[new_id].embeddings, old_dets[old_id].embeddings, old_dets[0].embedding_size);
sim_det[index].new_id = new_id;
sim_det[index].old_id = old_id;
sim_det[index].sim = sim;
}
}

// sort similarity
std::sort(sim_det.begin(), sim_det.end(), [](similarity_detections_t v1, similarity_detections_t v2) { return v1.sim > v2.sim; });
if(sim_det.size() > 0) printf(" sim_det_first = %f, sim_det_end = %f \n", sim_det.begin()->sim, sim_det.rbegin()->sim);


std::vector<int> new_idx(new_dets_num, 1);
std::vector<int> old_idx(old_dets.size(), 1);
std::vector<int> track_idx(new_track_id, 1);

// match objects
for (int index = 0; index < new_dets_num*old_dets.size(); ++index) {
const int new_id = sim_det[index].new_id;
const int old_id = sim_det[index].old_id;
const int track_id = old_dets[old_id].track_id;
if (check_prob(new_dets[new_id], thresh) && track_idx[track_id] && new_idx[new_id] && old_idx[old_id] && sim_thresh < sim_det[index].sim) {
new_dets[new_id].sim = sim_det[index].sim;
new_dets[new_id].track_id = track_id;
new_idx[new_id] = 0;
old_idx[old_id] = 0;
track_idx[track_id] = 0;
}
}

// set new track_id
new_track_id = fill_remaining_id(new_dets, new_dets_num, new_track_id, thresh);

// store new_detections to the queue of vectors
std::vector<detection_t> new_det_vec;
for (int i = 0; i < new_dets_num; ++i) {
if(check_prob(new_dets[i], thresh))
new_det_vec.push_back(new_dets[i]);
}

old_dets_dq.push_back(new_det_vec); // add new
if (old_dets_dq.size() > deque_size) old_dets_dq.pop_front(); // remove old
}

8 changes: 7 additions & 1 deletion src/image_opencv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -893,10 +893,15 @@ extern "C" void draw_detections_cv_v3(mat_cv* mat, detection *dets, int num, flo
if (class_id < 0) {
strcat(labelstr, names[j]);
class_id = j;
char buff[10];
char buff[20];
if (dets[i].track_id) {
sprintf(buff, " (track = %d)", dets[i].track_id);
strcat(labelstr, buff);
}
sprintf(buff, " (%2.0f%%)", dets[i].prob[j] * 100);
strcat(labelstr, buff);
printf("%s: %.0f%% ", names[j], dets[i].prob[j] * 100);
if (dets[i].track_id) printf("(track = %d, sim = %f) ", dets[i].track_id, dets[i].sim);
}
else {
strcat(labelstr, ", ");
Expand Down Expand Up @@ -1540,4 +1545,5 @@ extern "C" void show_opencv_info()
extern "C" int wait_key_cv(int delay) { return 0; }
extern "C" int wait_until_press_key_cv() { return 0; }
extern "C" void destroy_all_windows_cv() {}
extern "C" void resize_window_cv(char const* window_name, int width, int height) {}
#endif // OPENCV
1 change: 1 addition & 0 deletions src/image_opencv.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ void show_opencv_info();
int wait_key_cv(int delay);
int wait_until_press_key_cv();
void destroy_all_windows_cv();
void resize_window_cv(char const* window_name, int width, int height);

#endif // OPENCV

Expand Down
2 changes: 2 additions & 0 deletions src/network.c
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ detection *make_network_boxes(network *net, float thresh, int *num)

if(l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
else dets[i].embeddings = NULL;
dets[i].embedding_size = l.embedding_size;
}
return dets;
}
Expand Down Expand Up @@ -823,6 +824,7 @@ detection *make_network_boxes_batch(network *net, float thresh, int *num, int ba

if (l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float));
else dets[i].embeddings = NULL;
dets[i].embedding_size = l.embedding_size;
}
return dets;
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ layer parse_yolo(list *options, size_params params)
l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
l.random = option_find_float_quiet(options, "random", 0);

l.track_history_size = option_find_int_quiet(options, "track_history_size", 5);
l.sim_thresh = option_find_int_quiet(options, "sim_thresh", 0.8);
int embedding_layer_id = option_find_int_quiet(options, "embedding_layer", 999999);
if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id;
if (embedding_layer_id != 999999) {
Expand Down

0 comments on commit ccb392d

Please sign in to comment.