MannLabs · vuductung · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/tests/test_featureengineering.py b/tests/test_featureengineering.py
@@ -1,6 +1,9 @@
 import unittest
 import numpy as np
 from selectlfq.featureengineering import FeatureEngineering
+from selectlfq.featureengineering import _nan_correlation_w_ref
+import pandas as pd
+import pytest
 
 
 class TestFeatureEngineering(unittest.TestCase):
@@ -44,36 +47,40 @@ def setUp(self):
         self.i = np.array(
             [
                 [0.0, 0.0, 0.0],
-                [
-                    1 / 3,
-                    1 / 3,
-                    1 / 3,
-                ],
+                [1 / 3, 1 / 3, 1 / 3],
                 [1 / 3, 1 / 3, 1 / 3],
             ]
         )
 
-    def test_calculate_mean_distance(self):
-        input = np.array(
-            [
+        # Setup test data
+        self.ms1_data = [
+            np.array([1.0, 2.0, 3.0]),  # First precursor MS1 data
+            np.array([2.0, 4.0, 6.0]),  # Second precursor MS1 data
+        ]
+
+        self.ms2_data = [
+            # First precursor MS2 data (2 fragments)
+            np.array(
                 [
-                    1,
-                    2,
-                ],
-                [2, 4],
-            ]
-        )
-        expected_output = np.array(
-            [
+                    [1.0, 2.0, 3.0],  # Fragment 1 - perfect correlation with MS1
+                    [2.0, 4.0, 6.0],  # Fragment 2 - perfect correlation with MS1
+                ]
+            ),
+            # Second precursor MS2 data (3 fragments)
+            np.array(
                 [
-                    0.5,
-                    4.0,
-                ],
-                [0.5, 4.0],
-            ]
-        )
+                    [2.0, 4.0, 6.0],  # Fragment 1 - perfect correlation with MS1
+                    [np.nan, 4.0, 6.0],  # Fragment 2 - partial data
+                    [0.0, 0.0, 0.0],  # Fragment 3 - no correlation (zero variance)
+                ]
+            ),
+        ]
+
+    def test_calculate_mean_distance(self):
+        input = np.array([[1, 2], [2, 4]])
+        expected_output = np.array([[0.5, 1.0]])
 
-        result = self.helper.calculate_mean_distance(input)
+        result = self.helper._calculate_mean_distance(input)
         np.testing.assert_array_equal(result, expected_output)
 
     def test_feature_engineering_mean(self):
@@ -115,12 +122,8 @@ def test_feature_engineering_rank_intensity(self):
         result_axis_0 = self.helper.feature_engineering(
             self.a, axis=0, func="rank_intensity"
         )
-        result_axis_1 = self.helper.feature_engineering(
-            self.b, axis=1, func="rank_intensity"
-        )
 
         self.assertTrue(np.array_equal(result_axis_0, self.e))
-        self.assertTrue(np.array_equal(result_axis_1, self.d))
 
     def test_feature_engineering_SNR(self):
         result_axis_0 = self.helper.feature_engineering(self.b, axis=0, func="SNR")
@@ -148,6 +151,108 @@ def test_feature_engineering_sparsity(self):
         self.assertTrue(np.allclose(result_axis_0, self.h))
         self.assertTrue(np.allclose(result_axis_1, self.i))
 
+    def test_nan_correlation_w_ref(self):
+        # Test case 1: Perfect positive correlation
+        data = np.array(
+            [
+                [1.0, 2.0, 3.0],  # Perfect correlation with ref
+                [2.0, 4.0, 6.0],  # Perfect correlation with ref
+                [np.nan, 2.0, 3.0],  # Partial data
+            ]
+        )
+        ref = np.array([1.0, 2.0, 3.0])
+
+        result = _nan_correlation_w_ref((data, ref))
+
+        np.testing.assert_almost_equal(result[0], 1.0)
+        np.testing.assert_almost_equal(result[1], 1.0)
+        np.testing.assert_almost_equal(result[2], 2 / 3)
+
+    def test_nan_correlation_w_ref_edge_cases(self):
+        # Test case 2: Edge cases
+        data = np.array(
+            [
+                [1.0, 1.0, 1.0],  # Zero standard deviation
+                [np.nan, np.nan, np.nan],  # All NaN
+                [1.0, np.nan, 3.0],  # Not enough points for correlation
+            ]
+        )
+        ref = np.array([1.0, 2.0, 3.0])
+
+        result = _nan_correlation_w_ref((data, ref))
+
+        np.testing.assert_almost_equal(result[0], 0.0)
+        np.testing.assert_almost_equal(result[1], 0.0)
+        np.testing.assert_almost_equal(result[2], 2 / 3)
+
+    def test_nan_correlation_w_ref_shape(self):
+        # Test case 3: Different ref shapes
+        data = np.array([[1.0, 2.0, 3.0]])
+        ref = np.array([[1.0, 2.0, 3.0]])  # 2D ref array
+
+        result = _nan_correlation_w_ref((data, ref))
+        np.testing.assert_almost_equal(result[0], 1.0)
+
+    def test_calculate_ms1_ms2_corr_shape(self):
+        """Test the calculation of MS1-MS2 correlations."""
+
+        # Create instance of feature engineering class
+        feat_eng = FeatureEngineering()
+
+        # Calculate correlations
+        result = feat_eng.calculate_ms1_ms2_corr(
+            ms1_data_extracted=self.ms1_data, ms2_data_extracted=self.ms2_data
+        )
+
+        # Assertions
+
+        # Check output type and shape
+        assert isinstance(result, pd.DataFrame)
+        assert result.shape == (5, 3)
+
+    def test_calculate_ms2_ms1_corr_results(self):
+        # Create instance of feature engineering class
+        feat_eng = FeatureEngineering()
+
+        # Calculate correlations
+        result = feat_eng.calculate_ms1_ms2_corr(
+            ms1_data_extracted=self.ms1_data, ms2_data_extracted=self.ms2_data
+        )
+
+        # Check values for first precursor
+        np.testing.assert_almost_equal(
+            result.iloc[0].values,  # First fragment correlations
+            np.array([1.0, 1.0, 1.0]),  # Perfect correlation
+        )
+        np.testing.assert_almost_equal(
+            result.iloc[1].values,  # Second fragment correlations
+            np.array([1.0, 1.0, 1.0]),  # Perfect correlation
+        )
+
+        # Check values for second precursor
+        np.testing.assert_almost_equal(
+            result.iloc[2].values,  # First fragment correlations
+            np.array([1.0, 1.0, 1.0]),  # Perfect correlation
+        )
+        np.testing.assert_almost_equal(
+            result.iloc[3].values,  # Second fragment correlations (partial data)
+            np.array([1.0, 1.0, 1.0]) * (2 / 3),  # Correlation with sparsity weight
+        )
+        np.testing.assert_almost_equal(
+            result.iloc[4].values,  # Third fragment correlations
+            np.array([0.0, 0.0, 0.0]),  # Zero correlation due to zero variance
+        )
+
+    def test_mismatched_lengths(self):
+        """Test handling of mismatched MS1 and MS2 data lengths."""
+        feat_eng = FeatureEngineering()
+
+        ms1_data = [np.array([1.0, 2.0, 3.0])]
+        ms2_data = [np.array([[1.0, 2.0, 3.0]]), np.array([[2.0, 4.0, 6.0]])]
+
+        with pytest.raises(ValueError):
+            feat_eng.calculate_ms1_ms2_corr(ms1_data, ms2_data)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_loss.py b/tests/test_loss.py
@@ -11,103 +11,6 @@ def setUp(self):
         self.tolerance = 1e-4
         self.model = MLP()
 
-    def test_compute_weighted_variance(self):
-        intensities = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        y = torch.tensor([0.5, 1.0, 1.5])
-        output = torch.tensor([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]])
-        kind = "unbiased"
-
-        expected_result = torch.tensor([6.4750, 6.7500, 7.3661])
-
-        result = self.helper._compute_weighted_variance(intensities, y, output)
-
-        self.assertTrue(torch.allclose(result, expected_result, self.tolerance))
-
-    def test_WeightdVarLoss_no_reg(self):
-        intensities = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        y = torch.tensor([0.5, 1.0, 1.5])
-        output = torch.tensor([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]])
-        kind = "unbiased"
-
-        expected_result1 = torch.tensor(6.8637)
-        expected_result2 = torch.tensor(20.5911)
-
-        result1 = self.helper.weighted_var_loss(
-            intensities, y, output, kind, reduction="mean"
-        )
-        result2 = self.helper.weighted_var_loss(
-            intensities, y, output, kind, reduction="sum"
-        )
-
-        self.assertTrue(torch.allclose(result1, expected_result1, self.tolerance))
-        self.assertTrue(torch.allclose(result2, expected_result2, self.tolerance))
-
-    def test_WeightdVarLoss_L1(self):
-        self.helper_l1 = Loss(lambda1=2.0)
-
-        intensities = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        y = torch.tensor([0.5, 1.0, 1.5])
-        output = torch.tensor([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]])
-        weights = torch.tensor([1, 1.4, 1.8])
-
-        kind = "unbiased"
-
-        expected_result1 = torch.tensor(6.8637) + torch.tensor(8.4000)
-        expected_result2 = torch.tensor(20.5911) + torch.tensor(8.4000)
-
-        result1 = self.helper_l1.weightedvarloss(
-            intensities=intensities,
-            y=y,
-            output=output,
-            kind=kind,
-            weights=weights,
-            reduction="mean",
-        )
-        result2 = self.helper_l1.weightedvarloss(
-            intensities=intensities,
-            y=y,
-            output=output,
-            kind=kind,
-            weights=weights,
-            reduction="sum",
-        )
-
-        self.assertTrue(torch.allclose(result1, expected_result1, self.tolerance))
-        self.assertTrue(torch.allclose(result2, expected_result2, self.tolerance))
-
-    def test_WeightdVarLoss_L2(self):
-        self.helper_l2 = Loss(lambda2=2.0)
-
-        intensities = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
-        y = torch.tensor([0.5, 1.0, 1.5])
-        output = torch.tensor([[0.2, 0.3, 0.4], [0.5, 0.6, 0.7]])
-        weights = torch.tensor([1, 1.4, 1.8])
-
-        kind = "unbiased"
-
-        expected_result1 = torch.tensor(6.8637) + torch.tensor(12.4000)
-        expected_result2 = torch.tensor(20.5911) + torch.tensor(12.4000)
-
-        result1 = self.helper_l2.weightedvarloss(
-            intensities=intensities,
-            y=y,
-            output=output,
-            kind=kind,
-            weights=weights,
-            reduction="mean",
-        )
-        result2 = self.helper_l2.weightedvarloss(
-            intensities=intensities,
-            y=y,
-            output=output,
-            kind=kind,
-            weights=weights,
-            reduction="sum",
-        )
-
-        self.assertTrue(torch.allclose(result1, expected_result1, self.tolerance))
-        self.assertTrue(torch.allclose(result2, expected_result2, self.tolerance))
-
     def test_relative_log_fold_change(self):
         a = torch.tensor([[1.0, 2.0, 3.0], [10.0, 11.0, 15.0]])
         expected_result = torch.tensor(