Performance improvement in metrics calculations (#409)

Summary: Pull Request resolved: #409 Metrics calculation for doc classification takes much longer than the training itself: x34 of the training time! This simple diff improves the metrics time significantly by fixing the low-hanging fruit of bool->int casting. It's still taking the majority of the time (x9 only!), so it will need further improvements. Reviewed By: borguz Differential Revision: D14474255 fbshipit-source-id: 55a4448ce73cc2c9dc61e98e52862d642167b752
facebookresearch · Mar 19, 2019 · a22981a · a22981a
1 parent c6cf384
commit a22981a
Showing 1 changed file with 8 additions and 3 deletions.
diff --git a/pytext/metrics/__init__.py b/pytext/metrics/__init__.py
@@ -453,6 +453,8 @@ def average_precision_score(
 
     Returns:
         Average precision score.
+
+    TODO: This is too slow, improve the performance
     """
     ap = 0.0
     tp = 0
@@ -462,7 +464,8 @@ def average_precision_score(
     added_positives = 0
 
     for k, (label, score) in enumerate(zip(y_true_sorted, y_score_sorted)):
-        added_positives += label
+        if label:
+            added_positives += 1
         if score != threshold:
             threshold = score
             recall_diff = added_positives / total_positive
@@ -599,8 +602,10 @@ def compute_roc_auc(
     n_correct_pair_order = 0
 
     for y in reversed(y_true_sorted):  # want low predicted to high predicted
-        n_false += 1 - y
-        n_correct_pair_order += y * n_false
+        if y:
+            n_correct_pair_order += n_false
+        else:
+            n_false += 1
 
     n_true = len(y_true) - n_false
     if n_true == 0 or n_false == 0: