Add MOTSA metrics for panoptic tracking (nutonomy#646)

yuesuLi · Aug 23, 2021 · 0cd6bbd · 0cd6bbd
1 parent 448edbc
commit 0cd6bbd
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -21,7 +21,8 @@ Welcome to the devkit of the [nuScenes](https://www.nuscenes.org/nuscenes) and [
 - [Citation](#citation)
 
 ## Changelog
-- Jul. 29, 2020: Devkit v1.1.6: nuScenes-panoptic v1.0 code, NeurIPS challenge announcement.
+- Aug. 23, 2021: Devkit v1.1.7: Add more panoptic tracking metrics to nuScenes-panoptic code.
+- Jul. 29, 2021: Devkit v1.1.6: nuScenes-panoptic v1.0 code, NeurIPS challenge announcement.
 - Apr. 5, 2021: Devkit v1.1.3: Bug fixes and pip requirements.
 - Nov. 23, 2020: Devkit v1.1.2: Release map-expansion v1.3 with lidar basemap.
 - Nov. 9, 2020: Devkit v1.1.1: Lidarseg evaluation code, NeurIPS challenge announcement.

diff --git a/python-sdk/nuscenes/eval/panoptic/evaluate.py b/python-sdk/nuscenes/eval/panoptic/evaluate.py
@@ -99,7 +99,7 @@ def __init__(self,
         if self.task == 'tracking':
             self.scene_name2tok = {rec['name']: rec['token'] for rec in nusc.scene}
             self.evaluator['tracking'] = PanopticTrackingEval(n_classes=self.num_classes,
-                                                              min_stuff_cls_id=len(self.things)+1, 
+                                                              min_stuff_cls_id=len(self.things) + 1,
                                                               ignore=[self.ignore_idx],
                                                               min_points=self.min_inst_points)
 
@@ -123,7 +123,7 @@ def evaluate_segmentation(self) -> Dict[str, Any]:
         Calculate panoptic segmentation metrics.
         :return: A dict of panoptic metrics for mean of all classes and each class.
             {
-                "all": { "PQ": float, "SQ": float, "RQ": float, "IoU": float, "PQ_dagger": float},
+                "all": { "PQ": float, "SQ": float, "RQ": float, "mIoU": float, "PQ_dagger": float},
                 "ignore": { "PQ": float, "SQ": float, "RQ": float, "IoU": float},
                 "car": { "PQ": float, "SQ": float, "RQ": float, "IoU": float},
                 ...
@@ -186,7 +186,7 @@ def wrap_result_segmentation(self,
         class_all_iou = class_all_iou.flatten().tolist()
 
         results = dict()
-        results["all"] = dict(PQ=mean_pq, SQ=mean_sq, RQ=mean_rq, IoU=mean_iou)
+        results["all"] = dict(PQ=mean_pq, SQ=mean_sq, RQ=mean_rq, mIoU=mean_iou)
         for idx, (pq, rq, sq, iou) in enumerate(zip(class_all_pq, class_all_rq, class_all_sq, class_all_iou)):
             results[self.id2name[idx]] = dict(PQ=pq, SQ=sq, RQ=rq, IoU=iou)
         thing_pq_list = [float(results[c]["PQ"]) for c in self.things]
@@ -200,8 +200,8 @@ def evaluate_tracking(self) -> Dict[str, Any]:
         Calculate multi-object panoptic tracking metrics.
         :return: A dict of panoptic metrics for mean of all classes and each class.
             {
-                "all": { "PTQ": float, "sPTQ": float, "LSTQ": float, "IoU": float, "S_assoc": float,
-                         "PTQ_dagger": float},
+                "all": { "PTQ": float, "sPTQ": float, "LSTQ": float, "mIoU": float, "S_assoc": float,
+                         "PTQ_dagger": float, "MOTSA": float, sMOTSA: float, MOTSP: float},
                 "ignore": { "PTQ": float, "sPTQ": float, "IoU": float},
                 "car": { "PTQ": float, "sPTQ": float, "IoU": float},
                 ...
@@ -243,20 +243,34 @@ def evaluate_tracking(self) -> Dict[str, Any]:
         mean_ptq, class_all_ptq, mean_sptq, class_all_sptq = self.evaluator['tracking'].get_ptq()
         mean_iou, class_all_iou = self.evaluator['tracking'].getSemIoU()
         lstq, s_assoc = self.evaluator['tracking'].get_lstq()
+        mean_motsa, mean_s_motsa, mean_motsp = self.evaluator['tracking'].get_motsa()
+
+        results = self.wrap_result_mopt(mean_ptq=mean_ptq,
+                                        class_all_ptq=class_all_ptq,
+                                        mean_sptq=mean_sptq,
+                                        class_all_sptq=class_all_sptq,
+                                        mean_iou=mean_iou,
+                                        class_all_iou=class_all_iou,
+                                        lstq=lstq,
+                                        s_assoc=s_assoc,
+                                        mean_motsa=mean_motsa,
+                                        mean_s_motsa=mean_s_motsa,
+                                        mean_motsp=mean_motsp)
 
-        results = self.wrap_result_mopt(mean_ptq, mean_sptq, mean_iou, class_all_ptq, class_all_sptq, class_all_iou,
-                                        lstq, s_assoc)
         return results
 
     def wrap_result_mopt(self,
                          mean_ptq: np.ndarray,
-                         mean_sptq: np.ndarray,
-                         mean_iou: np.ndarray,
                          class_all_ptq: np.ndarray,
+                         mean_sptq: np.ndarray,
                          class_all_sptq: np.ndarray,
+                         mean_iou: np.ndarray,
                          class_all_iou: np.ndarray,
                          lstq: np.ndarray,
-                         s_assoc: np.ndarray) -> Dict[str, Any]:
+                         s_assoc: np.ndarray,
+                         mean_motsa: np.ndarray,
+                         mean_s_motsa: np.ndarray,
+                         mean_motsp: np.ndarray) -> Dict[str, Any]:
         """
         Wrap up MOPT results to dictionary.
         :param mean_ptq: <float64: 1>, Mean PTQ score over all classes.
@@ -267,6 +281,9 @@ def wrap_result_mopt(self,
         :param class_all_iou: <float64: num_classes,>, IoU scores for each class.
         :param lstq: <float64: 1>, LiDAR Segmentation and Tracking Quality (LSTQ) score over all classes.
         :param s_assoc: <float64: 1>, Association Score over all classes.
+        :param mean_motsa: <float64: 1>, Mean MOTSA score over all thing classes.
+        :param mean_s_motsa: <float64: 1>, Mean sMOTSA score over all thing classes.
+        :param mean_motsp: <float64: 1>, Mean MOTSP score over all thing classes.
         :return: A dict of multi-object panoptic tracking metrics.
         """
         mean_ptq, mean_sptq, mean_iou = mean_ptq.item(), mean_sptq.item(), mean_iou.item()
@@ -275,7 +292,8 @@ def wrap_result_mopt(self,
         class_all_iou = class_all_iou.flatten().tolist()
 
         results = dict()
-        results["all"] = dict(PTQ=mean_ptq, sPTQ=mean_sptq, LSTQ=lstq, IoU=mean_iou, S_assoc=s_assoc)
+        results["all"] = dict(PTQ=mean_ptq, sPTQ=mean_sptq, LSTQ=lstq, mIoU=mean_iou, S_assoc=s_assoc,
+                              MOTSA=mean_motsa, sMOTSA=mean_s_motsa, MOTSP=mean_motsp)
         for idx, (ptq, sptq, iou) in enumerate(zip(class_all_ptq, class_all_sptq, class_all_iou)):
             results[self.id2name[idx]] = dict(PTQ=ptq, sPTQ=sptq, IoU=iou)
         thing_ptq_list = [float(results[c]["PTQ"]) for c in self.things]

diff --git a/python-sdk/nuscenes/eval/panoptic/panoptic_track_evaluator.py b/python-sdk/nuscenes/eval/panoptic/panoptic_track_evaluator.py
@@ -259,11 +259,54 @@ def get_ptq(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
 
         ptq_all = ((iou - ids) / tp_eps) * (tp / tp_half_fp_half_fn_eps)  # Calculate PTQ of all classes.
         soft_ptq_all = ((iou - soft_ids) / tp_eps) * (tp / tp_half_fp_half_fn_eps)  # Calculate soft-PTQ of all classes.
-        mean_ptq = ptq_all[self.include].mean()  # Mean PTQ over all classes except ignored classes.
-        mean_soft_ptq = soft_ptq_all[self.include].mean()  # Mean soft-PTQ over all classes except ignored classes.
+
+        ground_truths = tp + fn
+        # Get classes that at least has 1 ground truth instance (use threshold 0.5), and is included in self.include.
+        valid_classes = ground_truths > 0.5
+        for i in range(valid_classes.shape[0]):
+            if i not in self.include:
+                valid_classes[i] = False
+
+        # Mean PTQ and sPTQ over all classes except invalid (ignored or classes has zero ground truth) classes.
+        mean_ptq = ptq_all[valid_classes].mean()
+        mean_soft_ptq = soft_ptq_all[valid_classes].mean()
 
         return mean_ptq, ptq_all, mean_soft_ptq, soft_ptq_all
 
+    def get_motsa(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Calculate MOTSA metrics.
+        :return: (mean_MOTSA, mean_sMOTSA, mean_MOTSP).
+            mean_MOTSA: <float64, 1>, mean MOTSA score over all thing classes.
+            mean_sMOTSA: <float64, 1>, mean soft-MOTSA score over all thing classes.
+            mean_sMOTSP: <float64, 1>, mean soft-MOTSP score over all thing classes.
+        """
+        iou = self.pan_iou[1:self.min_stuff_cls_id].astype(np.double)
+        ids = self.pan_ids[1:self.min_stuff_cls_id].astype(np.double)
+
+        # Get tp, fp, and fn for all things: class 1:min_stuff_cls_id.
+        tp = self.pan_tp[1:self.min_stuff_cls_id].astype(np.double)
+        fp = self.pan_fp[1:self.min_stuff_cls_id].astype(np.double)
+        tp_eps, fn = np.maximum(tp, self.eps), self.pan_fn[1:self.min_stuff_cls_id].astype(np.double)
+
+        ground_truths = tp + fn
+        # Get classes that at least has 1 ground truth instance (use threshold 0.5), and is included in self.include.
+        valid_classes = ground_truths > 0.5
+        for i in range(valid_classes.shape[0]):
+            if i + 1 not in self.include:  # i + 1 as valid_clssses covers class IDs of 1:self.min_stuff_cls_id.
+                valid_classes[i] = False
+
+        # Calculate MOTSA of all valid thing classes.
+        motsa = (tp - fp - ids)[valid_classes] / (tp_eps + fn)[valid_classes]
+        # Calculate sMOTSA of all valid thing classes.
+        s_motsa = (iou - fp - ids)[valid_classes] / (tp_eps + fn)[valid_classes]
+        motsp = iou[valid_classes] / tp_eps[valid_classes]
+        mean_motsa = motsa.mean()  # Mean MOTSA over all thing classes.
+        mean_s_motsa = s_motsa.mean()  # Mean sMOTSA over all thing classes.
+        mean_motsp = motsp.mean()
+
+        return mean_motsa, mean_s_motsa, mean_motsp
+
     def get_lstq(self) -> Tuple[np.ndarray, np.ndarray]:
         """
         Calculate Lidar Segmentation and Tracking Quality (LSTQ) metric. https://arxiv.org/pdf/2102.12472.pdf

diff --git a/setup/setup.py b/setup/setup.py
@@ -39,7 +39,7 @@ def get_dirlist(_rootdir):
 
 setuptools.setup(
     name='nuscenes-devkit',
-    version='1.1.6',
+    version='1.1.7',
     author='Holger Caesar, Oscar Beijbom, Qiang Xu, Varun Bankiti, Alex H. Lang, Sourabh Vora, Venice Erin Liong, '
            'Sergi Widjaja, Kiwoo Shin, Caglayan Dicle, Freddy Boulton, Whye Kit Fong, Asha Asvathaman, Lubing Zhou '
            'et al.',