From 6aac74413c674f6f803cd060e2bd0013f78ad9a5 Mon Sep 17 00:00:00 2001 From: tangshixiang Date: Thu, 10 May 2018 00:36:06 +0800 Subject: [PATCH] delete utils cuz files have been merged to ops --- utils/__init__.py | 0 utils/anet_db.py | 222 ------------------------------- utils/detection_metrics.py | 84 ------------ utils/metrics.py | 60 --------- utils/sequence_funcs.py | 136 ------------------- utils/thumos_db.py | 266 ------------------------------------- utils/video_funcs.py | 82 ------------ 7 files changed, 850 deletions(-) delete mode 100644 utils/__init__.py delete mode 100644 utils/anet_db.py delete mode 100644 utils/detection_metrics.py delete mode 100644 utils/metrics.py delete mode 100644 utils/sequence_funcs.py delete mode 100644 utils/thumos_db.py delete mode 100644 utils/video_funcs.py diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/utils/anet_db.py b/utils/anet_db.py deleted file mode 100644 index fc18644..0000000 --- a/utils/anet_db.py +++ /dev/null @@ -1,222 +0,0 @@ -#from .utils import * -from collections import OrderedDict - - -class Instance(object): - """ - Representing an instance of activity in the videos - """ - - def __init__(self, idx, anno, vid_id, vid_info, name_num_mapping): - self._starting, self._ending = anno['segment'][0], anno['segment'][1] - self._str_label = anno['label'] - self._total_duration = vid_info['duration'] - self._idx = idx - self._vid_id = vid_id - self._file_path = None - - if name_num_mapping: - self._num_label = name_num_mapping[self._str_label] - - @property - def time_span(self): - return self._starting, self._ending - - @property - def covering_ratio(self): - return self._starting / float(self._total_duration), self._ending / float(self._total_duration) - - @property - def num_label(self): - return self._num_label - - @property - def label(self): - return self._str_label - - @property - def name(self): - return '{}_{}'.format(self._vid_id, self._idx) - - @property - def path(self): - if self._file_path is None: - raise ValueError("This instance is not associated to a file on disk. Maybe the file is missing?") - return self._file_path - - @path.setter - def path(self, path): - self._file_path = path - - -class Video(object): - """ - This class represents one video in the activity-net db - """ - def __init__(self, key, info, name_idx_mapping=None): - self._id = key - self._info_dict = info - self._instances = [Instance(i, x, self._id, self._info_dict, name_idx_mapping) - for i, x in enumerate(self._info_dict['annotations'])] - self._file_path = None - - @property - def id(self): - return self._id - - @property - def url(self): - return self._info_dict['url'] - - @property - def instances(self): - return self._instances - - @property - def duration(self): - return self._info_dict['duration'] - - @property - def subset(self): - return self._info_dict['subset'] - - @property - def instance(self): - return self._instances - - @property - def path(self): - if self._file_path is None: - raise ValueError("This video is not associated to a file on disk. Maybe the file is missing?") - return self._file_path - - @path.setter - def path(self, path): - self._file_path = path - - -class ANetDB(object): - """ - This class is the abstraction of the activity-net db - """ - - _CONSTRUCTOR_LOCK = object() - - def __init__(self, token): - """ - Disabled constructor - :param token: - :return: - """ - if token is not self._CONSTRUCTOR_LOCK: - raise ValueError("Use get_db to construct an instance, do not directly use the constructor") - - @classmethod - def get_db(cls, version="1.2"): - """ - Build the internal representation of Activity Net databases - We use the alphabetic order to transfer the label string to its numerical index in learning - :param version: - :return: - """ - if version not in ['1.2', '1.3']: - raise ValueError("Unsupported database version {}".format(version)) - - import os - raw_db_file = 'data/activity_net.v{}.min.json'.format('-'.join(version.split('.'))) - - import json - db_data = json.load(open(raw_db_file)) - - me = cls(cls._CONSTRUCTOR_LOCK) - me.version = version - me.prepare_data(db_data) - - return me - - def prepare_data(self, raw_db): - self._version = raw_db['version'] - - # deal with taxonomy - self._taxonomy = raw_db['taxonomy'] - self._parse_taxonomy() - - self._database = raw_db['database'] - self._video_dict = {k: Video(k, v, self._name_idx_table) for k,v in self._database.items()} - - - - # split testing/training/validation set - self._testing_dict = OrderedDict(sorted([(k, v) for k, v in self._video_dict.items() if v.subset == 'testing'], key=lambda x: x[0])) - self._training_dict = OrderedDict(sorted([(k, v) for k, v in self._video_dict.items() if v.subset == 'training'], key=lambda x: x[0])) - self._validation_dict = OrderedDict(sorted([(k, v) for k, v in self._video_dict.items() if v.subset == 'validation'], key=lambda x: x[0])) - - self._training_inst_dict = {i.name: i for v in self._training_dict.values() for i in v.instances} - self._validation_inst_dict = {i.name: i for v in self._validation_dict.values() for i in v.instances} - - print("There are {} videos for training, {} for validation, {} for testing".format( - len(self._training_dict), len(self._validation_dict), len(self._testing_dict) - )) - print("There are {} instances for training, {} for validataion".format( - len(self._training_inst_dict), len(self._validation_inst_dict) - )) - - def get_subset_videos(self, subset_name): - if subset_name == 'training': - return self._training_dict.values() - elif subset_name == 'validation': - return self._validation_dict.values() - elif subset_name == 'testing': - return self._testing_dict.values() - else: - raise ValueError("Unknown subset {}".format(subset_name)) - - def get_subset_instance(self, subset_name): - if subset_name == 'training': - return self._training_inst_dict.values() - elif subset_name == 'validation': - return self._validation_inst_dict.values() - else: - raise ValueError("Unknown subset {}".format(subset_name)) - - def get_ordered_label_list(self): - return [self._idx_name_table[x] for x in sorted(self._idx_name_table.keys())] - - def _parse_taxonomy(self): - """ - This function just parse the taxonomy file - It gives alphabetical ordered indices to the classes in competition - :return: - """ - name_dict = {x['nodeName']: x for x in self._taxonomy} - parents = set() - for x in self._taxonomy: - parents.add(x['parentName']) - - # leaf nodes are those without any child - leaf_nodes = [name_dict[x] for x - in list(set(name_dict.keys()).difference(parents))] - sorted_lead_nodes = sorted(leaf_nodes, key=lambda l: l['nodeName']) - self._idx_name_table = {i: e['nodeName'] for i, e in enumerate(sorted_lead_nodes)} - self._name_idx_table = {e['nodeName']: i for i, e in enumerate(sorted_lead_nodes)} - self._name_table = {x['nodeName']: x for x in sorted_lead_nodes} - print("Got {} leaf classes out of {}".format(len(self._name_table), len(name_dict))) - - def try_load_file_path(self, frame_path): - """ - Simple version of path finding - :return: - """ - import glob - import os - folders = glob.glob(os.path.join(frame_path, '*')) - ids = [os.path.splitext(name)[0][-11:] for name in folders] - - folder_dict = dict(zip(ids, folders)) - - cnt = 0 - for k in self._video_dict.keys(): - if k in folder_dict: - self._video_dict[k].path = folder_dict[k] - cnt += 1 - print("loaded {} video folders".format(cnt)) diff --git a/utils/detection_metrics.py b/utils/detection_metrics.py deleted file mode 100644 index eb1fa2b..0000000 --- a/utils/detection_metrics.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -This module provides some utils for calculating metrics in temporal action detection -""" -import numpy as np - - -def temporal_iou(span_A, span_B): - """ - Calculates the intersection over union of two temporal "bounding boxes" - - span_A: (start, end) - span_B: (start, end) - """ - union = min(span_A[0], span_B[0]), max(span_A[1], span_B[1]) - inter = max(span_A[0], span_B[0]), min(span_A[1], span_B[1]) - - if inter[0] >= inter[1]: - return 0 - else: - return float(inter[1] - inter[0]) / float(union[1] - union[0]) - - -def overlap_over_b(span_A, span_B): - inter = max(span_A[0], span_B[0]), min(span_A[1], span_B[1]) - if inter[0] >= inter[1]: - return 0 - else: - return float(inter[1] - inter[0]) / float(span_B[1] - span_B[0]) - - -def temporal_recall(gt_spans, est_spans, thresh=0.5): - """ - Calculate temporal recall of boxes and estimated boxes - Parameters - ---------- - gt_spans: [(start, end), ...] - est_spans: [(start, end), ...] - - Returns - recall_info: (hit, total) - ------- - - """ - hit_slot = [False] * len(gt_spans) - for i, gs in enumerate(gt_spans): - for es in est_spans: - if temporal_iou(gs, es) > thresh: - hit_slot[i] = True - break - recall_info = (np.sum(hit_slot), len(hit_slot)) - return recall_info - - -def name_proposal(gt_spans, est_spans, thresh=0.0): - """ - Assigng label to positive proposals - :param gt_spans: [(label, (start, end)), ...] - :param est_spans: [(start, end), ...] - :param thresh: - :return: [(label, overlap, start, end), ...] same number of est_spans - """ - ret = [] - for es in est_spans: - max_overlap = 0 - max_overlap_over_self = 0 - label = 0 - for gs in gt_spans: - ov = temporal_iou(gs[1], es) - ov_pr = overlap_over_b(gs[1], es) - if ov > thresh and ov > max_overlap: - label = gs[0] + 1 - max_overlap = ov - max_overlap_over_self = ov_pr - ret.append((label, max_overlap, max_overlap_over_self, es[0], es[1])) - - return ret - - -def get_temporal_proposal_recall(pr_list, gt_list, thresh): - recall_info_list = [temporal_recall(x, y, thresh=thresh) for x, y in zip(gt_list, pr_list)] - per_video_recall = np.sum([x[0] == x[1] for x in recall_info_list]) / float(len(recall_info_list)) - per_inst_recall = np.sum([x[0] for x in recall_info_list]) / float(np.sum([x[1] for x in recall_info_list])) - return per_video_recall, per_inst_recall - diff --git a/utils/metrics.py b/utils/metrics.py deleted file mode 100644 index ab15872..0000000 --- a/utils/metrics.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -This module provides some utils for calculating metrics -""" -import numpy as np -from sklearn.metrics import average_precision_score, confusion_matrix - - -def softmax(raw_score, T=1): - exp_s = np.exp((raw_score - raw_score.max(axis=-1)[..., None])*T) - sum_s = exp_s.sum(axis=-1) - return exp_s / sum_s[..., None] - - -def top_k_acc(lb_set, scores, k=3): - idx = np.argsort(scores)[-k:] - return len(lb_set.intersection(idx)), len(lb_set) - - -def top_k_hit(lb_set, scores, k=3): - idx = np.argsort(scores)[-k:] - return len(lb_set.intersection(idx)) > 0, 1 - - -def top_3_accuracy(score_dict, video_list): - return top_k_accuracy(score_dict, video_list, 3) - - -def top_k_accuracy(score_dict, video_list, k): - video_labels = [set([i.num_label for i in v.instances]) for v in video_list] - - video_top_k_acc = np.array( - [top_k_hit(lb, score_dict[v.id], k=k) for v, lb in zip(video_list, video_labels) - if v.id in score_dict]) - - tmp = video_top_k_acc.sum(axis=0).astype(float) - top_k_acc = tmp[0] / tmp[1] - - return top_k_acc - - -def video_mean_ap(score_dict, video_list): - avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if - v.id in score_dict] - pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict]) - gt_array = np.zeros(pred_array.shape) - - for i in xrange(pred_array.shape[0]): - gt_array[i, list(avail_video_labels[i])] = 1 - mean_ap = average_precision_score(gt_array, pred_array, average='macro') - return mean_ap - - -def mean_class_accuracy(scores, labels): - pred = np.argmax(scores, axis=1) - cf = confusion_matrix(labels, pred).astype(float) - - cls_cnt = cf.sum(axis=1) - cls_hit = np.diag(cf) - - return np.mean(cls_hit/cls_cnt) diff --git a/utils/sequence_funcs.py b/utils/sequence_funcs.py deleted file mode 100644 index e31e304..0000000 --- a/utils/sequence_funcs.py +++ /dev/null @@ -1,136 +0,0 @@ -from .metrics import softmax - -import sys -import numpy as np -from scipy.ndimage import gaussian_filter -try: - from nms.nms_wrapper import nms -except ImportError: - nms = None - -def label_frame_by_threshold(score_mat, cls_lst, bw=None, thresh=list([0.05]), multicrop=True): - """ - Build frame labels by thresholding the foreground class responses - :param score_mat: - :param cls_lst: - :param bw: - :param thresh: - :param multicrop: - :return: - """ - if multicrop: - f_score = score_mat.mean(axis=1) - else: - f_score = score_mat - - ss = softmax(f_score) - - rst = [] - for cls in cls_lst: - cls_score = ss[:, cls+1] if bw is None else gaussian_filter(ss[:, cls+1], bw) - for th in thresh: - rst.append((cls, cls_score > th, f_score[:, cls+1])) - - return rst - - -def gen_exponential_sw_proposal(video_info, time_step=1, max_level=8, overlap=0.4): - spans = [2 ** x for x in range(max_level)] - duration = video_info.duration - pr = [] - for t_span in spans: - span = t_span * time_step - step = int(np.ceil(span * (1 - overlap))) - local_boxes = [(i, i + t_span) for i in np.arange(0, duration, step)] - pr.extend(local_boxes) - - # fileter proposals - # a valid proposal should have at least one second in the video - def valid_proposal(duration, span): - real_span = min(duration, span[1]) - span[0] - return real_span >= 1 - - pr = list(filter(lambda x: valid_proposal(duration, x), pr)) - return pr - - -def temporal_nms(bboxes, thresh, score_ind=3): - """ - One-dimensional non-maximal suppression - :param bboxes: [[st, ed, cls, score], ...] - :param thresh: - :return: - """ - if not nms: - return temporal_nms_fallback(bboxes, thresh, score_ind=score_ind) - else: - keep = nms(np.array([[x[0], x[1], x[3]] for x in bboxes]), thresh, device_id=0) - return [bboxes[i] for i in keep] - - -def temporal_nms_fallback(bboxes, thresh, score_ind=3): - """ - One-dimensional non-maximal suppression - :param bboxes: [[st, ed, cls, score], ...] - :param thresh: - :return: - """ - t1 = np.array([x[0] for x in bboxes]) - t2 = np.array([x[1] for x in bboxes]) - scores = np.array([x[score_ind] for x in bboxes]) - - durations = t2 - t1 + 1 - order = scores.argsort()[::-1] - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - tt1 = np.maximum(t1[i], t1[order[1:]]) - tt2 = np.minimum(t2[i], t2[order[1:]]) - intersection = tt2 - tt1 + 1 - IoU = intersection / (durations[i] + durations[order[1:]] - intersection).astype(float) - - inds = np.where(IoU <= thresh)[0] - order = order[inds + 1] - - return [bboxes[i] for i in keep] - - - -def build_box_by_search(frm_label_lst, tol, min=1): - boxes = [] - for cls, frm_labels, frm_scores in frm_label_lst: - length = len(frm_labels) - diff = np.empty(length+1) - diff[1:-1] = frm_labels[1:].astype(int) - frm_labels[:-1].astype(int) - diff[0] = float(frm_labels[0]) - diff[length] = 0 - float(frm_labels[-1]) - cs = np.cumsum(1 - frm_labels) - offset = np.arange(0, length, 1) - - up = np.nonzero(diff == 1)[0] - down = np.nonzero(diff == -1)[0] - - assert len(up) == len(down), "{} != {}".format(len(up), len(down)) - for i, t in enumerate(tol): - signal = cs - t * offset - for x in range(len(up)): - s = signal[up[x]] - for y in range(x + 1, len(up)): - if y < len(down) and signal[up[y]] > s: - boxes.append((up[x], down[y-1]+1, cls, sum(frm_scores[up[x]:down[y-1]+1]))) - break - else: - boxes.append((up[x], down[-1] + 1, cls, sum(frm_scores[up[x]:down[-1] + 1]))) - - for x in range(len(down) - 1, -1, -1): - s = signal[down[x]] if down[x] < length else signal[-1] - t - for y in range(x - 1, -1, -1): - if y >= 0 and signal[down[y]] < s: - boxes.append((up[y+1], down[x] + 1, cls, sum(frm_scores[up[y+1]:down[x] + 1]))) - break - else: - boxes.append((up[0], down[x] + 1, cls, sum(frm_scores[0:down[x]+1 + 1]))) - - return boxes diff --git a/utils/thumos_db.py b/utils/thumos_db.py deleted file mode 100644 index a8673b6..0000000 --- a/utils/thumos_db.py +++ /dev/null @@ -1,266 +0,0 @@ -#from .utils import * -import os -import glob - - -class Instance(object): - """ - Representing an instance of activity in the videos - """ - - def __init__(self, idx, anno, vid_id, vid_info, name_num_mapping): - self._starting, self._ending = anno['segment'][0], anno['segment'][1] - self._str_label = anno['label'] - self._total_duration = vid_info['duration'] - self._idx = idx - self._vid_id = vid_id - self._file_path = None - - if name_num_mapping: - self._num_label = name_num_mapping[self._str_label] - - @property - def time_span(self): - return self._starting, self._ending - - @property - def covering_ratio(self): - return self._starting / float(self._total_duration), self._ending / float(self._total_duration) - - @property - def num_label(self): - return self._num_label - - @property - def label(self): - return self._str_label - - @property - def name(self): - return '{}_{}'.format(self._vid_id, self._idx) - - @property - def path(self): - if self._file_path is None: - raise ValueError("This instance is not associated to a file on disk. Maybe the file is missing?") - return self._file_path - - @path.setter - def path(self, path): - self._file_path = path - - -class Video(object): - """ - This class represents one video in the activity-net db - """ - def __init__(self, key, info, name_idx_mapping=None): - self._id = key - self._info_dict = info - self._instances = [Instance(i, x, self._id, self._info_dict, name_idx_mapping) - for i, x in enumerate(self._info_dict['annotations'])] - self._file_path = None - - @property - def id(self): - return self._id - - @property - def url(self): - return self._info_dict['url'] - - @property - def instances(self): - return self._instances - - @property - def duration(self): - return self._info_dict['duration'] - - @property - def subset(self): - return self._info_dict['subset'] - - @property - def instance(self): - return self._instances - - @property - def path(self): - if self._file_path is None: - raise ValueError("This video is not associated to a file on disk. Maybe the file is missing?") - return self._file_path - - @path.setter - def path(self, path): - self._file_path = path - - -class THUMOSDB(object): - """ - This class is the abstraction of the thumos db - """ - - _CONSTRUCTOR_LOCK = object() - - def __init__(self, token): - """ - Disabled constructor - :param token: - :return: - """ - if token is not self._CONSTRUCTOR_LOCK: - raise ValueError("Use get_db to construct an instance, do not directly use the constructor") - - @classmethod - def get_db(cls, year=14): - """ - Build the internal representation of THUMOS14 Net databases - We use the alphabetic order to transfer the label string to its numerical index in learning - :param version: - :return: - """ - if year not in [14, 15]: - raise ValueError("Unsupported challenge year {}".format(year)) - - import os - db_info_folder = 'data/thumos_{}'.format(year) - - me = cls(cls._CONSTRUCTOR_LOCK) - me.year = year - me.ignore_labels = ['Ambiguous'] - me.prepare_data(db_info_folder) - - return me - - def prepare_data(self, db_folder): - - def load_subset_info(subset): - duration_file = '{}_durations.txt'.format(subset) - annotation_folder = 'temporal_annotations_{}'.format(subset) - annotation_files = glob.glob(os.path.join(db_folder, annotation_folder, '*')) - avoid_file = '{}_avoid_videos.txt'.format(subset) - - durations_lines = [x.strip() for x in open(os.path.join(db_folder, duration_file))] - annotaion_list = [(os.path.basename(f).split('_')[0], list(open(f))) for f in annotation_files] - avoid_list = [x.strip().split() for x in open(os.path.join(db_folder, avoid_file))] - - avoid_set = set(['-'.join(x) for x in avoid_list]) - print("Loading avoid set:") - print(avoid_set) - - #process video info - video_names = [durations_lines[i].split('.')[0] for i in range(0, len(durations_lines), 2)] - video_durations = [durations_lines[i] for i in range(1, len(durations_lines), 2)] - video_info = list(zip(video_names, video_durations)) - - duration_dict = dict(video_info) - - # reorganize annotation to attach them to videos - video_table = {v: list() for v in video_names} - for cls_name, annotations in annotaion_list: - for a in annotations: - items = a.strip().split() - vid = items[0] - st, ed = float(items[1]), float(items[2]) - if ('{}-{}'.format(vid, cls_name) not in avoid_set) and (st <= float(duration_dict[vid])): - video_table[vid].append((cls_name, st, ed)) - - return video_info, video_table, annotation_files - - def construct_video_dict(video_info, annotaion_table, subset, name_idx_mapping): - video_dict = {} - instance_dict = {} - for v in video_info: - info_dict = { - 'duration': float(v[1]), - 'subset': subset, - 'url': None, - 'annotations': [ - {'label': item[0], 'segment': (item[1], item[2])} for item in annotaion_table[v[0]] if item[0] not in self.ignore_labels - ] - } - video_dict[v[0]] = Video(v[0], info_dict, name_idx_mapping) - instance_dict.update({i.name: i for i in video_dict[v[0]].instance}) - return video_dict, instance_dict - - self._validation_info = load_subset_info('validation') - self._test_info = load_subset_info('test') - - self._parse_taxonomy() - self._validation_dict, self._validation_inst_dict = construct_video_dict(self._validation_info[0], self._validation_info[1], - 'validation', self._name_idx_table) - self._test_dict, self._test_inst_dict = construct_video_dict(self._test_info[0], self._test_info[1], - 'test', self._name_idx_table) - self._video_dict = dict(list(self._validation_dict.items()) + list(self._test_dict.items())) - - def get_subset_videos(self, subset_name): - if subset_name == 'validation': - return self._validation_dict.values() - elif subset_name == 'test': - return self._test_dict.values() - else: - raise ValueError("Unknown subset {}".format(subset_name)) - - def get_subset_instance(self, subset_name): - if subset_name == 'test': - return self._test_inst_dict.values() - elif subset_name == 'validation': - return self._validation_inst_dict.values() - else: - raise ValueError("Unknown subset {}".format(subset_name)) - - def get_ordered_label_list(self): - return [self._idx_name_table[x] for x in sorted(self._idx_name_table.keys())] - - def _parse_taxonomy(self): - """ - This function just parse the taxonomy file - It gives alphabetical ordered indices to the classes in competition - :return: - """ - validation_names = sorted([os.path.split(x)[1].split('_')[0] for x in self._validation_info[-1]]) - test_names = sorted([os.path.split(x)[1].split('_')[0] for x in self._test_info[-1]]) - - if len(validation_names) != len(test_names): - raise IOError('Validation set and test have different number of classes: {} v.s. {}'.format( - len(validation_names), len(test_names))) - - final_names = [] - for i in range(len(validation_names)): - if validation_names[i] != test_names[i]: - raise IOError('Validation set and test have different class names: {} v.s. {}'.format( - validation_names[i], test_names[i])) - - if validation_names[i] not in self.ignore_labels: - final_names.append(validation_names[i]) - - sorted_names = sorted(final_names) - - self._idx_name_table = {i: e for i, e in enumerate(sorted_names)} - self._name_idx_table = {e: i for i, e in enumerate(sorted_names)} - print("Got {} classes for the year {}".format(len(self._idx_name_table), self.year)) - - def try_load_file_path(self, frame_path): - """ - Simple version of path finding - :return: - """ - import glob - import os - folders = glob.glob(os.path.join(frame_path, '*')) - ids = [os.path.split(name)[-1] for name in folders] - - folder_dict = dict(zip(ids, folders)) - - cnt = 0 - for k in self._video_dict.keys(): - if k in folder_dict: - self._video_dict[k].path = folder_dict[k] - cnt += 1 - print("loaded {} video folders".format(cnt)) - - -if __name__ == '__main__': - db = THUMOSDB.get_db() - db.try_load_file_path('/mnt/SSD/THUMOS14/THUMOS14_extracted/') diff --git a/utils/video_funcs.py b/utils/video_funcs.py deleted file mode 100644 index da99462..0000000 --- a/utils/video_funcs.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -This module provides our implementation of different functions to do video-level classification and stream fusion -""" -import numpy as np -from .metrics import softmax - - -def default_aggregation_func(score_arr, normalization=True, crop_agg=None): - """ - This is the default function for make video-level prediction - :param score_arr: a 3-dim array with (frame, crop, class) layout - :return: - """ - crop_agg = np.mean if crop_agg is None else crop_agg - if normalization: - return softmax(crop_agg(score_arr, axis=1).mean(axis=0)) - else: - return crop_agg(score_arr, axis=1).mean(axis=0) - - -def top_k_aggregation_func(score_arr, k, normalization=True, crop_agg=None): - crop_agg = np.mean if crop_agg is None else crop_agg - if normalization: - return softmax(np.sort(crop_agg(score_arr, axis=1), axis=0)[-k:, :].mean(axis=0)) - else: - return np.sort(crop_agg(score_arr, axis=1), axis=0)[-k:, :].mean(axis=0) - - -def sliding_window_aggregation_func(score, spans=[1, 2, 4, 8, 16], overlap=0.2, norm=True, fps=1): - """ - This is the aggregation function used for ActivityNet Challenge 2016 - :param score: - :param spans: - :param overlap: - :param norm: - :param fps: - :return: - """ - frm_max = score.mean(axis=1) - slide_score = [] - - def top_k_pool(scores, k): - return np.sort(scores, axis=0)[-k:, :].mean(axis=0) - - for t_span in spans: - span = t_span * fps - step = int(np.ceil(span * (1-overlap))) - local_agg = [frm_max[i: i+span].max(axis=0) for i in xrange(0, frm_max.shape[0], step)] - k = max(15, len(local_agg)/4) - slide_score.append(top_k_pool(np.array(local_agg), k)) - - out_score = np.mean(slide_score, axis=0) - - if norm: - return softmax(out_score) - else: - return out_score - - -def tpp_aggregation_func(score, num_class): - crop_avg = score.mean(axis=1) - stage = crop_avg.shape[1]/ num_class - length = score.shape[0] - step = float(stage) / length - out = np.zeros(num_class) - for t in xrange(length): - k = int(t * step) - out += crop_avg[t, k * num_class: (k+1)*num_class] - - return out / length - - -def default_fusion_func(major_score, other_scores, fusion_weights, norm=True): - assert len(other_scores) == len(fusion_weights) - out_score = major_score - for s, w in zip(other_scores, fusion_weights): - out_score += s * w - - if norm: - return softmax(out_score) - else: - return out_score