tune dcfnet

foolwood · foolwood · commit 1450e2076f2c · 2018-05-10T01:24:17.000-04:00
diff --git a/track/DCFNet.py b/track/DCFNet.py
@@ -83,7 +83,7 @@ def track(self, im):
         else:
             response = self.net(torch.Tensor(search))
         peak, idx = torch.max(response.view(self.config.num_scale, -1), 1)
-        peak = peak.data.numpy() * self.config.scale_factor
+        peak = peak.data.cpu().numpy() * self.config.scale_penalties
         best_scale = np.argmax(peak)
         r_max, c_max = np.unravel_index(idx[best_scale], self.config.net_input_size)
 
@@ -108,9 +108,6 @@ def track(self, im):
 
 if __name__ == '__main__':
     # base dataset path and setting
-    raw_data_path = '/media/sensetime/memo/OTB2015'
-    if not isdir(raw_data_path):
-        raw_data_path = '/data1/qwang/OTB100'
     dataset = 'OTB2015'
     base_path = join('dataset', dataset)
     json_path = join('dataset', dataset + '.json')
@@ -131,7 +128,7 @@ def track(self, im):
     for video_id, video in enumerate(videos):  # run without resetting
         video_path_name = annos[video]['name']
         init_rect = np.array(annos[video]['init_rect']).astype(np.float)
-        image_files = [join(raw_data_path, video_path_name, 'img', im_f) for im_f in annos[video]['image_files']]
+        image_files = [join(base_path, video_path_name, 'img', im_f) for im_f in annos[video]['image_files']]
         n_images = len(image_files)
 
         target_pos, target_sz = rect1_2_cxy_wh(init_rect)  # OTB label is 1-indexed
@@ -168,9 +165,9 @@ def track(self, im):
                 # cv2.waitKey(0)
 
             search = patch_crop - config.net_average_image
-            response = net(torch.Tensor(search).cuda())
+            response = net(torch.Tensor(search).cuda()).cpu()
             peak, idx = torch.max(response.view(config.num_scale, -1), 1)
-            peak = peak.cpu().data.numpy() * config.scale_penalties
+            peak = peak.data.numpy() * config.scale_penalties
             best_scale = np.argmax(peak)
             r_max, c_max = np.unravel_index(idx[best_scale], config.net_input_size)
 
@@ -212,4 +209,4 @@ def track(self, im):
             for x in res:
                 f.write(','.join(['{:.2f}'.format(i) for i in x]) + '\n')
 
-    eval_auc('OTB2015', 'DCFNet_test', 0, 1)
+    eval_auc('OTB2015', 'DCFNet_test', 0, 1)
diff --git a/track/tune_otb.py b/track/tune_otb.py
@@ -0,0 +1,107 @@
+import argparse
+import cv2
+import numpy as np
+from os import makedirs
+from os.path import isfile, isdir, join
+from util import cxy_wh_2_rect1
+import torch
+import json
+from DCFNet import *
+
+parser = argparse.ArgumentParser(description='Tune parameters for DCFNet tracker on OTB2015')
+parser.add_argument('-v', '--visualization', dest='visualization', action='store_true',
+                    help='whether visualize result')
+
+args = parser.parse_args()
+
+
+def tune_otb(param):
+    regions = []  # result and states[1 init / 2 lost / 0 skip]
+    # save result
+    benchmark_result_path = join('result', param['dataset'])
+    tracker_path = join(benchmark_result_path, (param['network_name'] +
+                        '_scale_step_{:.3f}'.format(param['config'].scale_step) +
+                        '_scale_penalty_{:.3f}'.format(param['config'].scale_penalty) +
+                        '_interp_factor_{:.3f}'.format(param['config'].interp_factor)))
+    result_path = join(tracker_path, '{:s}.txt'.format(param['video']))
+    if isfile(result_path):
+        return
+    if not isdir(tracker_path): makedirs(tracker_path)
+    with open(result_path, 'w') as f:  # Occupation
+        for x in regions:
+            f.write('')
+
+    ims = param['ims']
+    toc = 0
+    for f, im in enumerate(ims):
+        tic = cv2.getTickCount()
+        if f == 0:  # init
+            init_rect = p['init_rect']
+            tracker = DCFNetTraker(ims[f], init_rect, config=param['config'])
+            regions.append(init_rect)
+        else:  # tracking
+            rect = tracker.track(ims[f])
+            regions.append(rect)
+        toc += cv2.getTickCount() - tic
+
+        if args.visualization:  # visualization (skip lost frame)
+            if f == 0: cv2.destroyAllWindows()
+            location = [int(l) for l in location]  # int
+            cv2.rectangle(im, (location[0], location[1]), (location[0] + location[2], location[1] + location[3]), (0, 255, 255), 3)
+            cv2.putText(im, str(f), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
+
+            cv2.imshow(video, im)
+            cv2.waitKey(1)
+    toc /= cv2.getTickFrequency()
+    print('{:2d} Video: {:12s} Time: {:2.1f}s Speed: {:3.1f}fps'.format(v, video, toc, f / toc))
+    regions = np.array(regions)
+    regions[:,:2] += 1  # 1-index
+    with open(result_path, 'w') as f:
+        for x in regions:
+            f.write(','.join(['{:.2f}'.format(i) for i in x]) + '\n')
+
+
+params = {'dataset':['OTB2015'], 'network':['param.pth'],
+          'scale_step':np.arange(1.01, 1.05, 0.005, np.float32),
+          'scale_penalty':np.arange(0.98, 1.0, 0.025, np.float32),
+          'interp_factor':np.arange(0.001, 0.015, 0.001, np.float32)}
+
+p = dict()
+p['config'] = TrackerConfig()
+for network in params['network']:
+    p['network_name'] = network
+    np.random.shuffle(params['dataset'])
+    for dataset in params['dataset']:
+        base_path = join('dataset', dataset)
+        json_path = join('dataset', dataset+'.json')
+        annos = json.load(open(json_path, 'r'))
+        videos = annos.keys()
+        p['dataset'] = dataset
+        np.random.shuffle(videos)
+        for v, video in enumerate(videos):
+            p['v'] = v
+            p['video'] = video
+            video_path_name = annos[video]['name']
+            init_rect = np.array(annos[video]['init_rect']).astype(np.float)
+            image_files = [join(base_path, video_path_name, 'img', im_f) for im_f in annos[video]['image_files']]
+            target_pos = np.array([init_rect[0] + init_rect[2] / 2 -1 , init_rect[1] + init_rect[3] / 2 -1])  # 0-index
+            target_sz = np.array([init_rect[2], init_rect[3]])
+            ims = []
+            for image_file in image_files:
+                im = cv2.imread(image_file)
+                if im.shape[2] == 1:
+                    cv2.cvtColor(im, im, cv2.COLOR_GRAY2RGB)
+                ims.append(im)
+            p['ims'] = ims
+            p['init_rect'] = init_rect
+
+            np.random.shuffle(params['scale_step'])
+            np.random.shuffle(params['scale_penalty'])
+            np.random.shuffle(params['interp_factor'])
+            for scale_step in params['scale_step']:
+                for scale_penalty in params['scale_penalty']:
+                    for interp_factor in params['interp_factor']:
+                        p['config'].scale_step = float(scale_step)
+                        p['config'].scale_penalty = float(scale_penalty)
+                        p['config'].interp_factor = float(interp_factor)
+                        tune_otb(p)