Skip to content

Commit

Permalink
nuScenes tracking evaluation improvements (nutonomy#251)
Browse files Browse the repository at this point in the history
* Remove lap solver which is slower and causes issues in continuous integration

* Switch to 20 point evaluation for more accurate results

* Switch to 40 point evaluation

* Remove unused config fields

* Make test_algo independent of the number of recall thresholds

* Update test target values after changing number of recall thresholds

* Optimize memory usage by freeing up NuScenes object

* Update readme to reflect val results and 40 point interpolation
  • Loading branch information
holger-motional authored and oscar-nutonomy committed Nov 5, 2019
1 parent 0e32541 commit a093f3a
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 53 deletions.
10 changes: 5 additions & 5 deletions python-sdk/nuscenes/eval/tracking/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ The matching threshold (center distance) is 2m.

### AMOTA and AMOTP metrics
Our main metrics are the AMOTA and AMOTP metrics developed in \[2\].
These are integrals over the MOTA/MOTP curves using `n`-point interpolation (`n` to be determined).
Similar to the detection challenge, we drop points with `recall < 0.1` (not shown in the equation), as these are typically noisy.
These are integrals over the MOTA/MOTP curves using `n`-point interpolation (`n = 40`).
Similar to the detection challenge, we do not include points with `recall < 0.1` (not shown in the equation), as these are typically noisy.

- **AMOTA** (average multi object tracking accuracy):
Average over the MOTA \[3\] metric (see below) at different recall thresholds.
Expand Down Expand Up @@ -261,9 +261,9 @@ Note that these numbers are measured on the val split and therefore not identica

| Method | NDS | mAP | AMOTA | AMOTP | Modality | Detections download | Tracking download |
| --- | --- | --- | --- | --- | --- | --- | --- |
| Megvii \[6\] | 62.8 | 51.9 | 28.2 | 1.49 | Lidar | [link](https://www.nuscenes.org/data/detection-megvii.zip) | [link](https://www.nuscenes.org/data/tracking-megvii.zip) |
| PointPillars \[5\] | 44.8 | 29.5 | 6.9 | 1.69 | Lidar | [link](https://www.nuscenes.org/data/detection-pointpillars.zip) | [link](https://www.nuscenes.org/data/tracking-pointpillars.zip) |
| Mapillary \[7\] | 36.9 | 29.8 | 8.2 | 1.77 | Camera | [link](https://www.nuscenes.org/data/detection-mapillary.zip) | [link](https://www.nuscenes.org/data/tracking-mapillary.zip) |
| Megvii \[6\] | 62.8 | 51.9 | 27.9 | 1.50 | Lidar | [link](https://www.nuscenes.org/data/detection-megvii.zip) | [link](https://www.nuscenes.org/data/tracking-megvii.zip) |
| PointPillars \[5\] | 44.8 | 29.5 | 13.1 | 1.69 | Lidar | [link](https://www.nuscenes.org/data/detection-pointpillars.zip) | [link](https://www.nuscenes.org/data/tracking-pointpillars.zip) |
| Mapillary \[7\] | 36.9 | 29.8 | 10.3 | 1.79 | Camera | [link](https://www.nuscenes.org/data/detection-mapillary.zip) | [link](https://www.nuscenes.org/data/tracking-mapillary.zip) |

#### Overfitting
Some object detection methods overfit to the training data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@
"bicycle": 40
},
"dist_fcn": "center_distance",
"dist_ths": [0.5, 1.0, 2.0, 4.0],
"dist_th_tp": 2.0,
"min_recall": 0.1,
"min_precision": 0.1,
"max_boxes_per_sample": 500,
"metric_worst": {
"amota": 0.0,
Expand All @@ -33,5 +31,5 @@
"tid": 20,
"lgd": 20
},
"num_thresholds": 10
"num_thresholds": 40
}
9 changes: 0 additions & 9 deletions python-sdk/nuscenes/eval/tracking/data_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,18 @@ class TrackingConfig:
def __init__(self,
class_range: Dict[str, int],
dist_fcn: str,
dist_ths: List[float],
dist_th_tp: float,
min_recall: float,
min_precision: float,
max_boxes_per_sample: float,
metric_worst: Dict[str, float],
num_thresholds: int):

assert set(class_range.keys()) == set(TRACKING_NAMES), "Class count mismatch."
assert dist_th_tp in dist_ths, "dist_th_tp must be in set of dist_ths."

self.class_range = class_range
self.dist_fcn = dist_fcn
self.dist_ths = dist_ths
self.dist_th_tp = dist_th_tp
self.min_recall = min_recall
self.min_precision = min_precision
self.max_boxes_per_sample = max_boxes_per_sample
self.metric_worst = metric_worst
self.num_thresholds = num_thresholds
Expand All @@ -52,10 +47,8 @@ def serialize(self) -> dict:
return {
'class_range': self.class_range,
'dist_fcn': self.dist_fcn,
'dist_ths': self.dist_ths,
'dist_th_tp': self.dist_th_tp,
'min_recall': self.min_recall,
'min_precision': self.min_precision,
'max_boxes_per_sample': self.max_boxes_per_sample,
'metric_worst': self.metric_worst,
'num_thresholds': self.num_thresholds
Expand All @@ -66,10 +59,8 @@ def deserialize(cls, content: dict):
""" Initialize from serialized dictionary. """
return cls(content['class_range'],
content['dist_fcn'],
content['dist_ths'],
content['dist_th_tp'],
content['min_recall'],
content['min_precision'],
content['max_boxes_per_sample'],
content['metric_worst'],
content['num_thresholds'])
Expand Down
17 changes: 11 additions & 6 deletions python-sdk/nuscenes/eval/tracking/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,21 @@ class TrackingEval:
Please see https://www.nuscenes.org/tracking for more details.
"""
def __init__(self,
nusc: NuScenes,
config: TrackingConfig,
result_path: str,
eval_set: str,
output_dir: str = None,
output_dir: str,
nusc_version: str,
nusc_dataroot: str,
verbose: bool = True):
"""
Initialize a TrackingEval object.
:param nusc: A NuScenes object.
:param config: A TrackingConfig object.
:param result_path: Path of the nuScenes JSON result file.
:param eval_set: The dataset split to evaluate on, e.g. train, val or test.
:param output_dir: Folder to save plots and results to.
:param nusc_version: The version of the NuScenes dataset.
:param nusc_dataroot: Path of the nuScenes dataset on disk.
:param verbose: Whether to print to stdout.
"""
self.cfg = config
Expand All @@ -68,6 +70,10 @@ def __init__(self,
if not os.path.isdir(self.plot_dir):
os.makedirs(self.plot_dir)

# Initialize NuScenes object.
# We do not store it in self to let garbage collection take care of it and save memory.
nusc = NuScenes(version=nusc_version, verbose=verbose, dataroot=nusc_dataroot)

# Load data.
if verbose:
print('Initializing nuScenes tracking evaluation')
Expand Down Expand Up @@ -252,7 +258,6 @@ def main(self, render_curves: bool = True) -> TrackingMetrics:
with open(config_path, 'r') as _f:
cfg_ = TrackingConfig.deserialize(json.load(_f))

nusc_ = NuScenes(version=version_, verbose=verbose_, dataroot=dataroot_)
nusc_eval = TrackingEval(nusc_, config=cfg_, result_path=result_path_, eval_set=eval_set_,
output_dir=output_dir_, verbose=verbose_)
nusc_eval = TrackingEval(config=cfg_, result_path=result_path_, eval_set=eval_set_, output_dir=output_dir_,
nusc_version=version_, nusc_dataroot=dataroot_, verbose=verbose_)
nusc_eval.main(render_curves=render_curves_)
53 changes: 28 additions & 25 deletions python-sdk/nuscenes/eval/tracking/tests/test_algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ def test_empty_submission(self):

# Remove all predictions.
timestamp_boxes_pred = copy.deepcopy(tracks_gt['scene-1'])
for id, box in timestamp_boxes_pred.items():
timestamp_boxes_pred[id] = []
for timestamp, box in timestamp_boxes_pred.items():
timestamp_boxes_pred[timestamp] = []
tracks_pred = {'scene-1': timestamp_boxes_pred}

# Accumulate metrics.
Expand Down Expand Up @@ -116,14 +116,15 @@ def test_drop_prediction(self):

# Check outputs.
# Recall values above 0.75 (3/4 correct) are not achieved and therefore nan.
assert np.all(np.isnan(md.confidence[md.recall_hypo > 0.75]))
assert md.tp[3] == 3
assert md.fp[3] == 0
assert md.fn[3] == 1
assert md.lgd[3] == 0.5
assert md.tid[3] == 0
assert md.frag[3] == 1
assert md.ids[3] == 0
first_achieved = np.where(md.recall_hypo <= 0.75)[0][0]
assert np.all(np.isnan(md.confidence[:first_achieved]))
assert md.tp[first_achieved] == 3
assert md.fp[first_achieved] == 0
assert md.fn[first_achieved] == 1
assert md.lgd[first_achieved] == 0.5
assert md.tid[first_achieved] == 0
assert md.frag[first_achieved] == 1
assert md.ids[first_achieved] == 0

def test_drop_prediction_multiple(self):
""" Drop the first three predictions from the GT submission. """
Expand All @@ -150,14 +151,15 @@ def test_drop_prediction_multiple(self):

# Check outputs.
# Recall values above 0.75 (3/4 correct) are not achieved and therefore nan.
assert np.all(np.isnan(md.confidence[md.recall_hypo > 0.25]))
assert md.tp[8] == 1
assert md.fp[8] == 0
assert md.fn[8] == 3
assert md.lgd[8] == 3 * 0.5
assert md.tid[8] == 3 * 0.5
assert md.frag[8] == 0
assert md.ids[8] == 0
first_achieved = np.where(md.recall_hypo <= 0.25)[0][0]
assert np.all(np.isnan(md.confidence[:first_achieved]))
assert md.tp[first_achieved] == 1
assert md.fp[first_achieved] == 0
assert md.fn[first_achieved] == 3
assert md.lgd[first_achieved] == 3 * 0.5
assert md.tid[first_achieved] == 3 * 0.5
assert md.frag[first_achieved] == 0
assert md.ids[first_achieved] == 0

def test_identity_switch(self):
""" Change the tracking_id of one frame from the GT submission. """
Expand All @@ -181,13 +183,14 @@ def test_identity_switch(self):
md = ev.accumulate()

# Check outputs.
assert md.tp[5] == 2
assert md.fp[5] == 0
assert md.fn[5] == 0
assert md.lgd[5] == 0
assert md.tid[5] == 0
assert md.frag[5] == 0
assert md.ids[5] == 2 # One wrong id leads to 2 identity switches.
first_achieved = np.where(md.recall_hypo <= 0.5)[0][0]
assert md.tp[first_achieved] == 2
assert md.fp[first_achieved] == 0
assert md.fn[first_achieved] == 0
assert md.lgd[first_achieved] == 0
assert md.tid[first_achieved] == 0
assert md.frag[first_achieved] == 0
assert md.ids[first_achieved] == 2 # One wrong id leads to 2 identity switches.

def test_drop_gt(self):
""" Drop one box from the GT. """
Expand Down
11 changes: 7 additions & 4 deletions python-sdk/nuscenes/eval/tracking/tests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def basic_test(self,
json.dump(mock, f, indent=2)

cfg = config_factory('tracking_nips_2019')
nusc_eval = TrackingEval(nusc, cfg, self.res_mockup, eval_set=eval_set, output_dir=self.res_eval_folder,
verbose=False)
nusc_eval = TrackingEval(cfg, self.res_mockup, eval_set=eval_set, output_dir=self.res_eval_folder,
nusc_version=version, nusc_dataroot=os.environ['NUSCENES'], verbose=False)
metrics = nusc_eval.main(render_curves=render_curves)

return metrics
Expand All @@ -184,8 +184,11 @@ def test_delta_mock(self,

# Compare metrics to known solution.
if eval_set == 'mini_val':
self.assertAlmostEqual(metrics.compute_metric('mota'), 0.24081829757545278)
self.assertAlmostEqual(metrics.compute_metric('motp'), 1.2974351821696868)
self.assertAlmostEqual(metrics.compute_metric('amota'), 0.5383961573989436)
self.assertAlmostEqual(metrics.compute_metric('amotp'), 1.5275400961369252)
self.assertAlmostEqual(metrics.compute_metric('motar'), 0.8261827096838301)
self.assertAlmostEqual(metrics.compute_metric('mota'), 0.25003943918566174)
self.assertAlmostEqual(metrics.compute_metric('motp'), 1.2976508610883917)
else:
print('Skipping checks due to choice of custom eval_set: %s' % eval_set)

Expand Down
1 change: 0 additions & 1 deletion setup/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
cachetools
descartes
jupyter
lap
matplotlib
motmetrics
numpy
Expand Down

0 comments on commit a093f3a

Please sign in to comment.