tensorflow · tf-models-copybara-bot · Sep 15, 2020 · Jul 24, 2020 · Jul 24, 2020 · Jul 24, 2020
diff --git a/research/object_detection/core/region_similarity_calculator.py b/research/object_detection/core/region_similarity_calculator.py
@@ -78,6 +78,37 @@ def _compare(self, boxlist1, boxlist2):
     """
     return box_list_ops.iou(boxlist1, boxlist2)
 
+class DETRSimilarity(RegionSimilarityCalculator):
+  """Class to compute similarity for the Detection Transformer model.
+
+  This class computes pairwise similarity between two BoxLists using a weighted
+  combination of IOU, classification scores, and the L1 loss.
+  """
+  def __init__(self, l1_weight=5, giou_weight=2):
+    self.l1_weight = l1_weight
+    self.giou_weight = giou_weight
+
+  def _compare(self, boxlist1, boxlist2):
+    """Compute pairwise IOU similarity between the two BoxLists.
+
+    Args:
+      boxlist1: BoxList holding N groundtruth boxes.
+      boxlist2: BoxList holding M predicted boxes.
+
+    Returns:
+      A tensor with shape [N, M] representing pairwise iou scores.
+    """
+    groundtruth_labels = boxlist1.get_field(fields.BoxListFields.classes)
+    predicted_labels = boxlist2.get_field(fields.BoxListFields.classes)
+    # Currently supporting only softmax
+    classification_scores = tf.matmul(groundtruth_labels,
+                                      predicted_labels,
+                                      transpose_b=True)
+    loss = self.l1_weight * box_list_ops.l1(
+        boxlist1, boxlist2) + self.giou_weight * (1 - box_list_ops.giou(
+            boxlist1, boxlist2)) - classification_scores
+    return -loss
+
 
 class NegSqDistSimilarity(RegionSimilarityCalculator):
   """Class to compute similarity based on the squared distance metric.

diff --git a/research/object_detection/core/region_similarity_calculator_test.py b/research/object_detection/core/region_similarity_calculator_test.py
@@ -93,6 +93,24 @@ def graph_fn():
     iou_output = self.execute(graph_fn, [])
     self.assertAllClose(iou_output, exp_output)
 
+  def test_detr_similarity(self):
+    def graph_fn():
+      corners1 = tf.constant([[5.0, 7.0, 7.0, 9.0]])
+      corners2 = tf.constant([[5.0, 7.0, 7.0, 9.0], [5.0, 11.0, 7.0, 13.0]])
+      groundtruth_labels = tf.constant([[1.0, 0.0]])
+      predicted_labels = tf.constant([[0.0, 1000.0], [1000.0, 0.0]])
+      boxes1 = box_list.BoxList(corners1)
+      boxes2 = box_list.BoxList(corners2)
+      boxes1.add_field(fields.BoxListFields.classes, groundtruth_labels)
+      boxes2.add_field(fields.BoxListFields.classes, predicted_labels)
+      detr_similarity_calculator = \
+          region_similarity_calculator.DETRSimilarity()
+      detr_similarity = detr_similarity_calculator.compare(
+          boxes1, boxes2, None)
+      return detr_similarity
+    exp_output = [[2.0, -2.0/3.0 + 1.0 - 20.0]]
+    sim_output = self.execute(graph_fn, [])
+    self.assertAllClose(sim_output, exp_output)
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/research/object_detection/core/target_assigner.py b/research/object_detection/core/target_assigner.py
@@ -51,6 +51,7 @@
 from object_detection.core import region_similarity_calculator as sim_calc
 from object_detection.core import standard_fields as fields
 from object_detection.matchers import argmax_matcher
+from object_detection.matchers import hungarian_matcher
 from object_detection.utils import shape_utils
 from object_detection.utils import target_assigner_utils as ta_utils
 from object_detection.utils import tf_version
@@ -510,7 +511,8 @@ def batch_assign(target_assigner,
       anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
     (cls_targets, cls_weights,
      reg_targets, reg_weights, match) = target_assigner.assign(
-         anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights)
+         anchors, gt_boxes, gt_class_targets, unmatched_class_label,
+         gt_weights)
     cls_targets_list.append(cls_targets)
     cls_weights_list.append(cls_weights)
     reg_targets_list.append(reg_targets)
@@ -1897,3 +1899,178 @@ def assign_corner_offset_targets(
 
     return (tf.stack(corner_targets, axis=0),
             tf.stack(foreground_targets, axis=0))
+
+class DETRTargetAssigner(object):
+  """Target assigner to compute classification and regression targets."""
+
+  def __init__(self):
+    """Construct Object Detection Target Assigner.
+
+    Args:
+      matcher: an object_detection.core.Matcher used to match groundtruth to
+        predicted boxes.
+      box_coder_instance: an object_detection.core.BoxCoder used to encode
+        matching groundtruth boxes with respect to predicted boxes.
+
+    """
+    self._similarity_calc = sim_calc.DETRSimilarity()
+    self._matcher = hungarian_matcher.HungarianBipartiteMatcher()
+
+  def batch_assign(self,
+                   pred_box_batch,
+                   gt_box_batch,
+                   pred_class_batch,
+                   gt_class_targets_batch,
+                   gt_weights_batch=None):
+
+    """Batched assignment of classification and regression targets.
+
+    Args:
+      pred_box_batch: a tensor of shape [batch_size, num_queries, 4]
+        representing predicted bounding boxes.
+      gt_box_batch: a tensor of shape [batch_size, num_queries, 4]
+        representing groundtruth bounding boxes.
+      pred_class_batch: A list of tensors with length batch_size, where each
+        each tensor has shape [num_queries, num_classes] to be used
+        by certain similarity calculators.
+      gt_class_targets_batch: a list of tensors with length batch_size, where
+        each tensor has shape [num_gt_boxes_i, num_classes] and
+        num_gt_boxes_i is the number of boxes in the ith boxlist of
+        gt_box_batch.
+      gt_weights_batch: A list of 1-D tf.float32 tensors of shape
+        [num_boxes] containing weights for groundtruth boxes.
+
+    Returns:
+      batch_cls_targets: a tensor with shape [batch_size, num_pred_boxes,
+        num_classes],
+      batch_cls_weights: a tensor with shape [batch_size, num_pred_boxes,
+        num_classes],
+      batch_reg_targets: a tensor with shape [batch_size, num_pred_boxes,
+        box_code_dimension]
+      batch_reg_weights: a tensor with shape [batch_size, num_pred_boxes].
+    """
+    pred_box_batch = [
+        box_list.BoxList(pred_box)
+        for pred_box in tf.unstack(pred_box_batch)]
+    gt_box_batch = [
+        box_list.BoxList(gt_box)
+        for gt_box in tf.unstack(gt_box_batch)]
+
+    cls_targets_list = []
+    cls_weights_list = []
+    reg_targets_list = []
+    reg_weights_list = []
+    if gt_weights_batch is None:
+      gt_weights_batch = [None] * len(gt_class_targets_batch)
+    pred_class_batch = tf.unstack(pred_class_batch)
+    for (pred_boxes, gt_boxes, pred_class_batch,
+         gt_class_targets, gt_weights) in zip(
+            pred_box_batch, gt_box_batch, pred_class_batch,
+            gt_class_targets_batch, gt_weights_batch):
+      (cls_targets, cls_weights,
+       reg_targets, reg_weights) = self.assign(
+          pred_boxes, gt_boxes, pred_class_batch, gt_class_targets, gt_weights)
+      cls_targets_list.append(cls_targets)
+      cls_weights_list.append(cls_weights)
+      reg_targets_list.append(reg_targets)
+      reg_weights_list.append(reg_weights)
+    batch_cls_targets = tf.stack(cls_targets_list)
+    batch_cls_weights = tf.stack(cls_weights_list)
+    batch_reg_targets = tf.stack(reg_targets_list)
+    batch_reg_weights = tf.stack(reg_weights_list)
+    return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
+            batch_reg_weights)
+
+  def assign(self,
+             pred_boxes,
+             gt_boxes,
+             pred_classes,
+             gt_labels,
+             gt_weights=None):
+    """Assign classification and regression targets to each box_pred.
+
+    For a given set of pred_boxes and groundtruth detections, match pred_boxes
+    to gt_boxes and assign classification and regression targets to
+    each box_pred as well as weights based on the resulting match (specifying,
+    e.g., which pred_boxes should not contribute to training loss).
+
+    pred_boxes that are not matched to anything are given a classification target
+    of self._unmatched_cls_target which can be specified via the constructor.
+
+    Args:
+      pred_boxes: a BoxList representing N pred_boxes
+      gt_boxes: a BoxList representing M groundtruth boxes
+      pred_classes: A tensor with shape [max_num_boxes, num_classes]
+        to be used by certain similarity calculators.
+      gt_labels:  a tensor of shape [M, num_classes]
+        with labels for each of the ground_truth boxes. The subshape
+        [num_classes] can be empty (corresponding to scalar inputs).  When set
+        to None, gt_labels assumes a binary problem where all
+        ground_truth boxes get a positive label (of 1).
+      gt_weights: a float tensor of shape [M] indicating the weight to
+        assign to all pred_boxes match to a particular groundtruth box. The
+        weights must be in [0., 1.]. If None, all weights are set to 1.
+        Generally no groundtruth boxes with zero weight match to any pred_boxes
+        as matchers are aware of groundtruth weights. Additionally,
+        `cls_weights` and `reg_weights` are calculated using groundtruth
+        weights as an added safety.
+
+    Returns:
+      cls_targets: a float32 tensor with shape [num_pred_boxes, num_classes],
+        where the subshape [num_classes] is compatible with gt_labels
+        which has shape [num_gt_boxes, num_classes].
+      cls_weights: a float32 tensor with shape [num_pred_boxes, num_classes],
+        representing weights for each element in cls_targets.
+      reg_targets: a float32 tensor with shape [num_pred_boxes,
+        box_code_dimension]
+      reg_weights: a float32 tensor with shape [num_pred_boxes]
+
+    """
+    unmatched_class_label = tf.constant(
+        [1] + [0] * (gt_labels.shape[1] - 1), tf.float32)
+
+    if gt_weights is None:
+      num_gt_boxes = gt_boxes.num_boxes_static()
+      if not num_gt_boxes:
+        num_gt_boxes = gt_boxes.num_boxes()
+      gt_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
+
+    gt_boxes.add_field(fields.BoxListFields.classes,
+                                gt_labels)
+    pred_boxes.add_field(fields.BoxListFields.classes, pred_classes)
+
+    match_quality_matrix = self._similarity_calc.compare(
+        gt_boxes,
+        pred_boxes)
+    match = self._matcher.match(match_quality_matrix,
+                                valid_rows=tf.greater(gt_weights, 0))
+
+    matched_gt_boxes = match.gather_based_on_match(
+        gt_boxes.get(),
+        unmatched_value=tf.zeros(4),
+        ignored_value=tf.zeros(4))
+    matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+    ty, tx, th, tw = matched_gt_boxlist.get_center_coordinates_and_sizes()
+    reg_targets = tf.transpose(tf.stack([ty, tx, th, tw]))
+    cls_targets = match.gather_based_on_match(
+        gt_labels,
+        unmatched_value=unmatched_class_label,
+        ignored_value=unmatched_class_label)
+    reg_weights = match.gather_based_on_match(
+        gt_weights,
+        ignored_value=0.,
+        unmatched_value=0.)
+    cls_weights = match.gather_based_on_match(
+        gt_weights,
+        ignored_value=0.,
+        unmatched_value=1)
+
+    # convert cls_weights from per-box_pred to per-class.
+    class_label_shape = tf.shape(cls_targets)[1:]
+    weights_multiple = tf.concat(
+        [tf.constant([1]), class_label_shape],
+        axis=0)
+    cls_weights = tf.expand_dims(cls_weights, -1)
+    cls_weights = tf.tile(cls_weights, weights_multiple)
+
+    return (cls_targets, cls_weights, reg_targets, reg_weights)
diff --git a/research/object_detection/core/target_assigner_test.py b/research/object_detection/core/target_assigner_test.py
@@ -24,6 +24,7 @@
 from object_detection.core import standard_fields as fields
 from object_detection.core import target_assigner as targetassigner
 from object_detection.matchers import argmax_matcher
+from object_detection.matchers import hungarian_matcher
 from object_detection.utils import np_box_ops
 from object_detection.utils import test_case
 from object_detection.utils import tf_version
@@ -114,6 +115,7 @@ def graph_fn(anchor_means, groundtruth_box_corners):
     self.assertEqual(reg_targets_out.dtype, np.float32)
     self.assertEqual(reg_weights_out.dtype, np.float32)
 
+
   def test_assign_agnostic_with_keypoints(self):
     def graph_fn(anchor_means, groundtruth_box_corners,
                  groundtruth_keypoints):
@@ -2190,6 +2192,92 @@ def graph_fn():
     self.assertAllClose(foreground, np.zeros((1, 5, 5)))
 
 
+class DETRTargetAssignerTest(test_case.TestCase):
+  def test_assign_detr(self):
+    def graph_fn(pred_corners, groundtruth_box_corners,
+                 groundtruth_labels, predicted_labels):
+      detr_target_assigner = targetassigner.DETRTargetAssigner()
+      pred_boxlist = box_list.BoxList(pred_corners)
+      groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+      result = detr_target_assigner.assign(
+          pred_boxlist, groundtruth_boxlist,
+          predicted_labels, groundtruth_labels)
+      (cls_targets, cls_weights, reg_targets, reg_weights) = result
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+    pred_corners = np.array([[0.25, 0.25, 0.4, 0.2],
+                             [0.5, 0.8, 1.0, 0.8],
+                             [0.9, 0.5, 0.1, 1.0]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+                                        [0.5, 0.5, 0.9, 0.9]],
+                                       dtype=np.float32)
+    predicted_labels = np.array([[-3.0, 3.0], [2.0, 9.4], [5.0, 1.0]],
+                                dtype=np.float32)
+    groundtruth_labels = np.array([[0.0, 1.0], [0.0, 1.0]],
+                                  dtype=np.float32)
+
+    exp_cls_targets = [[0, 1], [0, 1], [1, 0]]
+    exp_cls_weights = [[1, 1], [1, 1], [1, 1]]
+    exp_reg_targets = [[0.25, 0.25, 0.5, 0.5],
+                       [0.7, 0.7, 0.4, 0.4],
+                       [0, 0, 0, 0]]
+    exp_reg_weights = [1, 1, 0]
+
+    (cls_targets_out,
+     cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+         graph_fn, [pred_corners, groundtruth_box_corners,
+                    groundtruth_labels, predicted_labels])
+
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
+    self.assertEqual(cls_targets_out.dtype, np.float32)
+    self.assertEqual(cls_weights_out.dtype, np.float32)
+    self.assertEqual(reg_targets_out.dtype, np.float32)
+    self.assertEqual(reg_weights_out.dtype, np.float32)
+
+  def test_batch_assign_detr(self):
+    def graph_fn(pred_corners, groundtruth_box_corners,
+                 groundtruth_labels, predicted_labels):
+      detr_target_assigner = targetassigner.DETRTargetAssigner()
+      result = detr_target_assigner.batch_assign(
+          pred_corners, groundtruth_box_corners,
+          [predicted_labels], [groundtruth_labels])
+      (cls_targets, cls_weights, reg_targets, reg_weights) = result
+      return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+    pred_corners = np.array([[[0.25, 0.25, 0.4, 0.2],
+                              [0.5, 0.8, 1.0, 0.8],
+                              [0.9, 0.5, 0.1, 1.0]]], dtype=np.float32)
+    groundtruth_box_corners = np.array([[[0.0, 0.0, 0.5, 0.5],
+                                         [0.5, 0.5, 0.9, 0.9]]],
+                                       dtype=np.float32)
+    predicted_labels = np.array([[-3.0, 3.0], [2.0, 9.4], [5.0, 1.0]],
+                                dtype=np.float32)
+    groundtruth_labels = np.array([[0.0, 1.0], [0.0, 1.0]],
+                                  dtype=np.float32)
+
+    exp_cls_targets = [[[0, 1], [0, 1], [1, 0]]]
+    exp_cls_weights = [[[1, 1], [1, 1], [1, 1]]]
+    exp_reg_targets = [[[0.25, 0.25, 0.5, 0.5],
+                        [0.7, 0.7, 0.4, 0.4],
+                        [0, 0, 0, 0]]]
+    exp_reg_weights = [[1, 1, 0]]
+
+    (cls_targets_out,
+     cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+         graph_fn, [pred_corners, groundtruth_box_corners,
+                    groundtruth_labels, predicted_labels])
+
+    self.assertAllClose(cls_targets_out, exp_cls_targets)
+    self.assertAllClose(cls_weights_out, exp_cls_weights)
+    self.assertAllClose(reg_targets_out, exp_reg_targets)
+    self.assertAllClose(reg_weights_out, exp_reg_weights)
+    self.assertEqual(cls_targets_out.dtype, np.float32)
+    self.assertEqual(cls_weights_out.dtype, np.float32)
+    self.assertEqual(reg_targets_out.dtype, np.float32)
+    self.assertEqual(reg_weights_out.dtype, np.float32)
+
 if __name__ == '__main__':
-  tf.enable_v2_behavior()
   tf.test.main()