Skip to content

Commit

Permalink
Added PAT metric (nutonomy#659)
Browse files Browse the repository at this point in the history
* added PAT metric for Panoptic nuScenes
  • Loading branch information
mohan1914 authored Sep 18, 2021
1 parent cf2f128 commit 5058621
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 11 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Welcome to the devkit of the [nuScenes](https://www.nuscenes.org/nuscenes) and [
- [Citation](#citation)

## Changelog
- Sep. 17, 2021: Devkit v1.1.8: Add PAT metric to Panoptic nuScenes.
- Aug. 23, 2021: Devkit v1.1.7: Add more panoptic tracking metrics to nuScenes-panoptic code.
- Jul. 29, 2021: Devkit v1.1.6: nuScenes-panoptic v1.0 code, NeurIPS challenge announcement.
- Apr. 5, 2021: Devkit v1.1.3: Bug fixes and pip requirements.
Expand Down
29 changes: 22 additions & 7 deletions python-sdk/nuscenes/eval/panoptic/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ class NuScenesPanopticEval:
nuScenes-panoptic uses the following metrics:
- Panoptic Segmentation: we use the PQ (Panoptic Quality) metric: which is defined as:
PQ = IOU/(TP + 0.5*FP + 0.5*FN).
- Multi-object Panoptic Tracking: we use the PTQ (Panoptic Tracking Quality) metric, which is defined as:
PTQ = (IOU - IDSs) / (TP + 0.5*FP + 0.5*FN).
- Multi-object Panoptic Tracking: we use the PAT (Panoptic Tracking) metric, which is defined as:
PAT = 2*PQ*TQ / (PQ + TQ) where TQ is as defined in the paper:
Panoptic nuScenes: A Large-Scale Benchmark for LiDAR Panoptic Segmentation and Tracking
(https://arxiv.org/pdf/2109.03805.pdf)
"""

def __init__(self,
Expand Down Expand Up @@ -200,8 +202,9 @@ def evaluate_tracking(self) -> Dict[str, Any]:
Calculate multi-object panoptic tracking metrics.
:return: A dict of panoptic metrics for mean of all classes and each class.
{
"all": { "PTQ": float, "sPTQ": float, "LSTQ": float, "mIoU": float, "S_assoc": float,
"PTQ_dagger": float, "MOTSA": float, sMOTSA: float, MOTSP: float},
"all": { "PAT": float, "PQ": float, "TQ": float, PTQ": float, "sPTQ": float, "LSTQ": float,
"mIoU": float, "S_assoc": float, "PTQ_dagger": float, "MOTSA": float, "sMOTSA": float,
"MOTSP": float},
"ignore": { "PTQ": float, "sPTQ": float, "IoU": float},
"car": { "PTQ": float, "sPTQ": float, "IoU": float},
...
Expand Down Expand Up @@ -240,12 +243,16 @@ def evaluate_tracking(self) -> Dict[str, Any]:
break
cur_token = cur_sample['next']

pat, mean_pq, mean_tq = self.evaluator['tracking'].get_pat()
mean_ptq, class_all_ptq, mean_sptq, class_all_sptq = self.evaluator['tracking'].get_ptq()
mean_iou, class_all_iou = self.evaluator['tracking'].getSemIoU()
lstq, s_assoc = self.evaluator['tracking'].get_lstq()
mean_motsa, mean_s_motsa, mean_motsp = self.evaluator['tracking'].get_motsa()

results = self.wrap_result_mopt(mean_ptq=mean_ptq,
results = self.wrap_result_mopt(pat=pat,
mean_pq=mean_pq,
mean_tq=mean_ptq,
mean_ptq=mean_ptq,
class_all_ptq=class_all_ptq,
mean_sptq=mean_sptq,
class_all_sptq=class_all_sptq,
Expand All @@ -260,6 +267,9 @@ def evaluate_tracking(self) -> Dict[str, Any]:
return results

def wrap_result_mopt(self,
pat: np.ndarray,
mean_pq: np.ndarray,
mean_tq: np.ndarray,
mean_ptq: np.ndarray,
class_all_ptq: np.ndarray,
mean_sptq: np.ndarray,
Expand All @@ -273,6 +283,9 @@ def wrap_result_mopt(self,
mean_motsp: np.ndarray) -> Dict[str, Any]:
"""
Wrap up MOPT results to dictionary.
:param pat: <float64: 1>, Panoptic Tracking (PAT) score over all classes.
:param mean_pq: <float64: 1>, Mean Panoptic Quality over all classes.
:param mean_tq: <float64: 1>, Mean Tracking Quality over all temporally unique instances.
:param mean_ptq: <float64: 1>, Mean PTQ score over all classes.
:param mean_sptq: <float64: 1>, Mean soft-PTQ score over all classes.
:param mean_iou: <float64: 1>, Mean IoU score over all classes.
Expand All @@ -286,14 +299,16 @@ def wrap_result_mopt(self,
:param mean_motsp: <float64: 1>, Mean MOTSP score over all thing classes.
:return: A dict of multi-object panoptic tracking metrics.
"""
pat, mean_pq, mean_tq = pat.item(), mean_pq.item(), mean_tq.item()
mean_ptq, mean_sptq, mean_iou = mean_ptq.item(), mean_sptq.item(), mean_iou.item()
class_all_ptq = class_all_ptq.flatten().tolist()
class_all_sptq = class_all_sptq.flatten().tolist()
class_all_iou = class_all_iou.flatten().tolist()

results = dict()
results["all"] = dict(PTQ=mean_ptq, sPTQ=mean_sptq, LSTQ=lstq, mIoU=mean_iou, S_assoc=s_assoc,
MOTSA=mean_motsa, sMOTSA=mean_s_motsa, MOTSP=mean_motsp)
results["all"] = dict(PAT=pat, PQ=mean_pq, TQ=mean_tq, PTQ=mean_ptq, sPTQ=mean_sptq,
LSTQ=lstq, mIoU=mean_iou, S_assoc=s_assoc, MOTSA=mean_motsa,
sMOTSA=mean_s_motsa, MOTSP=mean_motsp)
for idx, (ptq, sptq, iou) in enumerate(zip(class_all_ptq, class_all_sptq, class_all_iou)):
results[self.id2name[idx]] = dict(PTQ=ptq, sPTQ=sptq, IoU=iou)
thing_ptq_list = [float(results[c]["PTQ"]) for c in self.things]
Expand Down
137 changes: 134 additions & 3 deletions python-sdk/nuscenes/eval/panoptic/panoptic_track_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def __init__(self,
self.intersects = {}
self.intersects_ovr = {}

# PAT Tracking stuff.
self.instance_preds = {}
self.instance_gts = {}

# Per-class association quality stuff.
self.pan_aq = np.zeros(self.n_classes, dtype=np.double)
self.pan_aq_ovr = 0.0
Expand All @@ -76,8 +80,7 @@ def get_panoptic_track_stats(self,
x_inst_row: np.ndarray = None,
scene: str = None,
cl: int = None)\
-> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[int, int],
Dict[int, int], np.ndarray]:
-> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[int, int], Dict[int, int], np.ndarray]:
"""
Calculate class-specific panoptic tracking stats given predicted instances and target instances.
:param x_inst_in_cl: <np.int64: num_points>, instance IDs of each point for predicted instances.
Expand Down Expand Up @@ -129,6 +132,48 @@ def get_panoptic_track_stats(self,
unique_combo_, counts_combo_ = np.unique(offset_combo_, return_counts=True)
self.update_dict_stat(cl_intersects, unique_combo_, counts_combo_)

# Computation for PAT score
# Computes unique gt instances and its number of points > self.min_points
unique_gt_, counts_gt_ = np.unique(y_inst_in_cl[y_inst_in_cl > 0], return_counts=True)
id2idx_gt_ = {inst_id: idx for idx, inst_id in enumerate(unique_gt_)}
# Computes unique pred instances (class-agnotstic) and its number of points
unique_pred_, counts_pred_ = np.unique(x_inst_row[x_inst_row > 0], return_counts=True)
id2idx_pred_ = {inst_id: idx for idx, inst_id in enumerate(unique_pred_)}
# Actually unique_combo_ = pred_labels_ + self.offset * gt_labels_
gt_labels_ = unique_combo_ // self.offset
pred_labels_ = unique_combo_ % self.offset
gt_areas_ = np.array([counts_gt_[id2idx_gt_[g_id]] for g_id in gt_labels_])
pred_areas_ = np.array([counts_pred_[id2idx_pred_[p_id]] for p_id in pred_labels_])
# Here counts_combo_ : TP (point-level)
intersections_ = counts_combo_
# Here gt_areas_ : TP + FN, pred_areas_ : TP + FP (point-level)
# Overall unions_ : TP + FP + FN (point-level)
unions_ = gt_areas_ + pred_areas_ - intersections_
# IoU : TP / (TP + FP + FN)
ious_agnostic = intersections_.astype(np.float32) / unions_.astype(np.float32)
# tp_indexes_agnostic : TP (instance-level, IoU > 0.5)
tp_indexes_agnostic = ious_agnostic > 0.5
matched_gt_ = np.array([False] * len(id2idx_gt_))
matched_gt_[[id2idx_gt_[g_id] for g_id in gt_labels_[tp_indexes_agnostic]]] = True

# Stores matched tracks (the corresponding class-agnostic predicted instance) for the unique gt instances:
for idx, value in enumerate(tp_indexes_agnostic):
if value:
g_label = gt_labels_[idx]
p_label = pred_labels_[idx]
if g_label not in self.instance_gts[scene][cl]:
self.instance_gts[scene][cl][g_label] = [p_label,]
else:
self.instance_gts[scene][cl][g_label].append(p_label)

# Stores unmatched tracks for the unique gt instances: assigns 1 for no match
for g_label in unique_gt_:
if not matched_gt_[id2idx_gt_[g_label]]:
if g_label not in self.instance_gts[scene][cl]:
self.instance_gts[scene][cl][g_label] = [1,]
else:
self.instance_gts[scene][cl][g_label].append(1)

# Generate an intersection map, count the intersections with over 0.5 IoU as TP.
gt_labels = unique_combo // self.offset
pred_labels = unique_combo % self.offset
Expand Down Expand Up @@ -160,7 +205,9 @@ def add_batch_panoptic(self,
self.gts[scene] = [{} for _ in range(self.n_classes)]
self.intersects[scene] = [{} for _ in range(self.n_classes)]
self.intersects_ovr[scene] = [{} for _ in range(self.n_classes)]
# Make sure instance IDs are non-zeros. Otherwise, they will be ignored. Note in nuScenes-panoptic,
self.instance_preds[scene] = {}
self.instance_gts[scene] = [{} for _ in range(self.n_classes)]
# Make sure instance IDs are non-zeros. Otherwise, they will be ignored. Note in Panoptic nuScenes,
# instance IDs start from 1 already, so the following 2 lines of code are actually not necessary, but to be
# consistent with the PanopticEval class in panoptic_seg_evaluator.py from 3rd party. We keep these 2 lines. It
# means the actual instance IDs will start from 2 during metrics evaluation.
Expand All @@ -186,6 +233,14 @@ def add_batch_panoptic(self,
x_inst_row[0] = x_inst_row[0][gt_not_in_excl_mask]
y_inst_row[0] = y_inst_row[0][gt_not_in_excl_mask]

# Accumulate class-agnostic predictions
unique_pred_, counts_pred_ = np.unique(x_inst_row[1][x_inst_row[1] > 0], return_counts=True)
for p_id in unique_pred_[counts_pred_ > self.min_points]:
if p_id not in self.instance_preds[scene]:
self.instance_preds[scene][p_id] = 1
else:
self.instance_preds[scene][p_id] += 1

# First step is to count intersections > 0.5 IoU for each class (except the ignored ones).
for cl in self.include:
# Previous Frame.
Expand Down Expand Up @@ -336,6 +391,82 @@ def get_lstq(self) -> Tuple[np.ndarray, np.ndarray]:
lstq = np.sqrt(s_assoc * s_cls)
return lstq, s_assoc

def get_pat(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Calculate Panoptic Tracking (PAT) metric. https://arxiv.org/pdf/2109.03805.pdf
:return: (PAT, mean_PQ, mean_TQ).
PAT: <float64, 1>, PAT score over all classes.
mean_PQ: <float64, 1>, mean PQ scores over all classes.
mean_TQ: <float64, 1>, mean TQ score over all classes.
"""
# First calculate for all classes
sq_all = self.pan_iou.astype(np.double) / np.maximum(self.pan_tp.astype(np.double), self.eps)
rq_all = self.pan_tp.astype(np.double) / np.maximum(
self.pan_tp.astype(np.double) + 0.5 * self.pan_fp.astype(np.double) + 0.5 * self.pan_fn.astype(np.double),
self.eps)
pq_all = sq_all * rq_all

# Then do the REAL mean (no ignored classes)
pq = pq_all[self.include].mean()

accumulate_tq = 0.0
accumulate_norm = 0

for seq in self.sequences:
preds = self.instance_preds[seq]
for cl in self.include:
cls_gts = self.instance_gts[seq][cl]
for gt_id, pr_ids in cls_gts.items():
unique_pr_id, counts_pr_id = np.unique(pr_ids, return_counts=True)

track_length = len(pr_ids)
# void/stuff have instance value 1 due to the +1 in ln205 as well as unmatched gt is denoted by 1
# Thus we remove 1 from the prediction id list
unique_pr_id, counts_pr_id = unique_pr_id[unique_pr_id != 1], counts_pr_id[unique_pr_id != 1]
fp_pr_id = []

# Computes the total false positive for each prediction id:
# preds[uid]: TPA + FPA (class-agnostic)
# counts_pr_id[idx]: TPA (class-agnostic)
# If prediction id is not in preds it means it has number of points < self.min_points.
# Similar to PQ computation we consider pred with number of points < self.min_points
# with IoU overlap greater than 0.5 over gt as TPA but not for FPA (the else part).
for idx, uid in enumerate(unique_pr_id):
if uid in preds:
fp_pr_id.append(preds[uid] - counts_pr_id[idx])
else:
fp_pr_id.append(0)

fp_pr_id = np.array(fp_pr_id)
# AQ component of TQ where counts_pr_id = TPA, track_length = TPA + FNA, fp_pr_id = FPA.
gt_id_aq = np.sum(counts_pr_id ** 2 / np.double(track_length + fp_pr_id)) / np.double(track_length)
# Assigns ID switch component of TQ as 1.0 if the gt instance occurs only once.
gt_id_is = 1.0

if track_length > 1:
# Compute the ID switch component
s_id = -1
ids = 0
# Total possible id switches
total_ids = track_length - 1
# Gt tracks with no corresponding prediction match are assigned 1.
# We consider an id switch occurs if previous predicted id and the current one doesn't match
# for the given gt tracker or if there is no matching prediction for the given gt track
for pr_id in pr_ids:
if s_id != -1:
if pr_id != s_id or s_id == 1:
ids += 1
s_id = pr_id
gt_id_is = 1-(ids/np.double(total_ids))
# Accumulate TQ over all the possible unique gt instances
accumulate_tq += np.sqrt(gt_id_aq * gt_id_is)
# Count the total number of unique gt instances
accumulate_norm += 1
# Normalization
tq = np.array(accumulate_tq/accumulate_norm)
pat = (2 * pq * tq) / (pq + tq)
return pat, pq, tq

def add_batch(self, scene: str, x_sem: List[np.ndarray], x_inst: List[np.ndarray], y_sem: List[np.ndarray],
y_inst: List[np.ndarray]) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion setup/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_dirlist(_rootdir):

setuptools.setup(
name='nuscenes-devkit',
version='1.1.7',
version='1.1.8',
author='Holger Caesar, Oscar Beijbom, Qiang Xu, Varun Bankiti, Alex H. Lang, Sourabh Vora, Venice Erin Liong, '
'Sergi Widjaja, Kiwoo Shin, Caglayan Dicle, Freddy Boulton, Whye Kit Fong, Asha Asvathaman, Lubing Zhou '
'et al.',
Expand Down

0 comments on commit 5058621

Please sign in to comment.