Implement AUC-PR in Kaun metrics (#131)

Shocker444 · tmattio · web-flow · commit 82d0daaacc25 · 2025-10-27T15:46:50.000+05:30
Co-authored-by: Thibaut Mattio &lt;thibaut.mattio@gmail.com&gt;
diff --git a/CHANGES.md b/CHANGES.md
@@ -56,7 +56,7 @@ All notable changes to this project will be documented in this file.
 - Allow metric history to tolerate metrics that appear or disappear between epochs so dynamic metric sets no longer raise during training (@tmattio)
 - Make `Optimizer.clip_by_global_norm` robust to zero gradients and empty parameter trees to avoid NaNs during training (@tmattio)
 - Split CSV loader into `from_csv` and `from_csv_with_labels` to retain labels when requested (#114, @Satarupa22-SD)
-- Implement AUC-ROC in Kaun metrics and simplify its API (#109 @Shocker444)
+- Implement AUC-ROC and AUC-PR in Kaun metrics and simplify their signatures (#109, #131, @Shocker444)
 
 ### Talon
 
diff --git a/kaun/lib/kaun/metrics.ml b/kaun/lib/kaun/metrics.ml
@@ -32,6 +32,43 @@ type 'layout metric_fn =
 let scalar_tensor dtype value = Rune.scalar dtype value
 let ones_like t = Rune.ones (Rune.dtype t) (Rune.shape t)
 
+let accumulate_rank_metric_state metric_name state ~predictions ~targets
+    ?weights () =
+  let predictions = Rune.reshape [| -1 |] predictions in
+  let targets = Rune.reshape [| -1 |] targets in
+  let dtype =
+    match state with
+    | [ preds_acc; _; _ ] -> Rune.dtype preds_acc
+    | _ -> Rune.dtype predictions
+  in
+  let predictions = Rune.cast dtype predictions in
+  let targets = Rune.cast dtype targets in
+  let weights =
+    match weights with
+    | Some w -> Rune.cast dtype (Rune.reshape [| -1 |] w)
+    | None -> ones_like predictions
+  in
+  match state with
+  | [] -> [ predictions; targets; weights ]
+  | [ preds_acc; targets_acc; weights_acc ] ->
+      let preds_acc = Rune.concatenate ~axis:0 [ preds_acc; predictions ] in
+      let targets_acc = Rune.concatenate ~axis:0 [ targets_acc; targets ] in
+      let weights_acc = Rune.concatenate ~axis:0 [ weights_acc; weights ] in
+      [ preds_acc; targets_acc; weights_acc ]
+  | _ -> failwith (Printf.sprintf "Invalid %s state" metric_name)
+
+let prepare_rank_curve_inputs preds targets weights =
+  let dtype = Rune.dtype preds in
+  let sorted_idx = Rune.argsort ~axis:0 ~descending:true preds in
+  let sorted_targets = Rune.take_along_axis ~axis:0 sorted_idx targets in
+  let sorted_weights = Rune.take_along_axis ~axis:0 sorted_idx weights in
+  let positives = Rune.mul sorted_targets sorted_weights in
+  let negatives =
+    let ones = Rune.ones dtype (Rune.shape sorted_targets) in
+    Rune.mul (Rune.sub ones sorted_targets) sorted_weights
+  in
+  (positives, negatives)
+
 (** Core metric operations *)
 
 let update metric ~predictions ~targets ?weights () =
@@ -300,45 +337,15 @@ let auc_roc () =
   create_custom ~name:"auc_roc"
     ~init:(fun () -> [])
     ~update:(fun state ~predictions ~targets ?weights () ->
-      let predictions = Rune.reshape [| -1 |] predictions in
-      let targets = Rune.reshape [| -1 |] targets in
-      let dtype =
-        match state with
-        | [ preds_acc; _; _ ] -> Rune.dtype preds_acc
-        | _ -> Rune.dtype predictions
-      in
-      let predictions = Rune.cast dtype predictions in
-      let targets = Rune.cast dtype targets in
-      let weights =
-        match weights with
-        | Some w -> Rune.cast dtype (Rune.reshape [| -1 |] w)
-        | None -> Rune.ones dtype (Rune.shape predictions)
-      in
-      match state with
-      | [] -> [ predictions; targets; weights ]
-      | [ preds_acc; targets_acc; weights_acc ] ->
-          let preds_acc = Rune.concatenate ~axis:0 [ preds_acc; predictions ] in
-          let targets_acc = Rune.concatenate ~axis:0 [ targets_acc; targets ] in
-          let weights_acc = Rune.concatenate ~axis:0 [ weights_acc; weights ] in
-          [ preds_acc; targets_acc; weights_acc ]
-      | _ -> failwith "Invalid auc_roc state")
+      accumulate_rank_metric_state "auc_roc" state ~predictions ~targets
+        ?weights ())
     ~compute:(fun state ->
       match state with
       | [ preds; targets; weights ] ->
-          let dtype = Rune.dtype preds in
-          let ones = Rune.ones dtype (Rune.shape targets) in
-          let sorted_idx = Rune.argsort ~axis:0 ~descending:true preds in
-          let sorted_targets =
-            Rune.take_along_axis ~axis:0 sorted_idx targets
-          in
-          let sorted_weights =
-            Rune.take_along_axis ~axis:0 sorted_idx weights
-          in
-
-          let positives = Rune.mul sorted_targets sorted_weights in
-          let negatives =
-            Rune.mul (Rune.sub ones sorted_targets) sorted_weights
+          let positives, negatives =
+            prepare_rank_curve_inputs preds targets weights
           in
+          let dtype = Rune.dtype positives in
 
           let cum_tp = Rune.cumsum ~axis:0 positives in
           let cum_fp = Rune.cumsum ~axis:0 negatives in
@@ -380,13 +387,54 @@ let auc_roc () =
       | _ -> failwith "Invalid auc_roc state")
     ~reset:(fun _ -> [])
 
-let auc_pr ?(num_thresholds = 200) ?(curve = false) () =
-  let _ = num_thresholds in
-  let _ = curve in
+let auc_pr () =
   create_custom ~name:"auc_pr"
     ~init:(fun () -> [])
-    ~update:(fun state ~predictions:_ ~targets:_ ?weights:_ () -> state)
-    ~compute:(fun _ -> failwith "AUC-PR not yet implemented")
+    ~update:(fun state ~predictions ~targets ?weights () ->
+      accumulate_rank_metric_state "auc_pr" state ~predictions ~targets ?weights
+        ())
+    ~compute:(fun state ->
+      match state with
+      | [ preds; targets; weights ] ->
+          let positives, negatives =
+            prepare_rank_curve_inputs preds targets weights
+          in
+          let dtype = Rune.dtype positives in
+
+          let cum_tp = Rune.cumsum ~axis:0 positives in
+          let cum_fp = Rune.cumsum ~axis:0 negatives in
+
+          let cum_fn = Rune.sub (Rune.sum positives) cum_tp in
+
+          let zero = scalar_tensor dtype 0.0 in
+          let cum_tp =
+            Rune.concatenate ~axis:0 [ Rune.reshape [| 1 |] zero; cum_tp ]
+          in
+          let cum_fp =
+            Rune.concatenate ~axis:0 [ Rune.reshape [| 1 |] zero; cum_fp ]
+          in
+          let cum_fn =
+            Rune.concatenate ~axis:0 [ Rune.reshape [| 1 |] zero; cum_fn ]
+          in
+
+          let precision_denom = Rune.add cum_tp cum_fp in
+          let recall_denom = Rune.add cum_tp cum_fn in
+          let eps = scalar_tensor dtype 1e-7 in
+
+          let precision = Rune.div cum_tp (Rune.add precision_denom eps) in
+          let recall = Rune.div cum_tp (Rune.add recall_denom eps) in
+
+          let n = Rune.size precision in
+          if n < 2 then scalar_tensor dtype 0.0
+          else
+            let tail_recall = Rune.slice [ Rune.R (1, n) ] recall in
+            let head_recall = Rune.slice [ Rune.R (0, n - 1) ] recall in
+            let dx = Rune.sub tail_recall head_recall in
+
+            let precision_k = Rune.slice [ Rune.R (1, n) ] precision in
+
+            Rune.sum (Rune.mul dx precision_k)
+      | _ -> failwith "Invalid auc_pr state")
     ~reset:(fun _ -> [])
 
 let confusion_matrix ~num_classes ?(normalize = `None) () =
diff --git a/kaun/lib/kaun/metrics.mli b/kaun/lib/kaun/metrics.mli
@@ -122,10 +122,13 @@ val auc_roc : unit -> 'layout t
     Computes the exact ROC integral by sorting predictions and accumulating
     true/false positive rates across all seen batches. *)
 
-val auc_pr : ?num_thresholds:int -> ?curve:bool -> unit -> 'layout t
-(** [auc_pr ?num_thresholds ?curve ()] creates an AUC-PR metric.
+val auc_pr : unit -> 'layout t
+(** [auc_pr ()] creates an AUC-PR metric.
 
-    Area Under the Precision-Recall Curve. *)
+    Area Under the Precision-Recall Curve.
+
+    Computes the exact precision-recall integral by sorting predictions and
+    accumulating precision/recall scores across all seen batches. *)
 
 val confusion_matrix :
   num_classes:int ->
diff --git a/kaun/test/test_metrics.ml b/kaun/test/test_metrics.ml
@@ -113,6 +113,41 @@ let test_auc_roc_multiple_updates () =
 
   check (tensor_testable 1e-5) "auc roc incremental" full_result chunked_result
 
+let test_auc_pr () =
+  let dtype = Rune.float32 in
+
+  let predictions = Rune.create dtype [| 4 |] [| 0.8; 0.7; 0.6; 0.3 |] in
+  let targets = Rune.create dtype [| 4 |] [| 1.; 1.; 0.; 0. |] in
+
+  let auc = Metrics.auc_pr () in
+  Metrics.update auc ~predictions ~targets ();
+  let result = Metrics.compute auc in
+  (* For perfectly separable predictions, AUC should be 1.0 *)
+  let expected = Rune.scalar dtype 1.0 in
+  check (tensor_testable 1e-5) "auc pr" expected result
+
+let test_auc_pr_multiple_updates () =
+  let dtype = Rune.float32 in
+
+  let predictions_full = Rune.create dtype [| 4 |] [| 0.8; 0.7; 0.6; 0.3 |] in
+  let targets_full = Rune.create dtype [| 4 |] [| 1.; 1.; 0.; 0. |] in
+
+  let auc_single = Metrics.auc_pr () in
+  Metrics.update auc_single ~predictions:predictions_full ~targets:targets_full
+    ();
+  let full_result = Metrics.compute auc_single in
+
+  let auc_chunked = Metrics.auc_pr () in
+  let predictions_1 = Rune.create dtype [| 2 |] [| 0.8; 0.7 |] in
+  let targets_1 = Rune.create dtype [| 2 |] [| 1.; 1. |] in
+  Metrics.update auc_chunked ~predictions:predictions_1 ~targets:targets_1 ();
+  let predictions_2 = Rune.create dtype [| 2 |] [| 0.6; 0.3 |] in
+  let targets_2 = Rune.create dtype [| 2 |] [| 0.; 0. |] in
+  Metrics.update auc_chunked ~predictions:predictions_2 ~targets:targets_2 ();
+  let chunked_result = Metrics.compute auc_chunked in
+
+  check (tensor_testable 1e-5) "auc pr incremental" full_result chunked_result
+
 let test_confusion_matrix () =
   let dtype = Rune.float32 in
 
@@ -414,6 +449,9 @@ let () =
           test_case "auc_roc" `Quick test_auc_roc;
           test_case "auc_roc_multiple_updates" `Quick
             test_auc_roc_multiple_updates;
+          test_case "auc_pr" `Quick test_auc_pr;
+          test_case "auc_pr_multiple_updates" `Quick
+            test_auc_pr_multiple_updates;
           test_case "confusion_matrix" `Quick test_confusion_matrix;
         ] );
       ( "regression",