Add Anomaly types to the dataframe generated by get_anomalies_dataframe

tfx-copybara · tfx-copybara · commit fc03f1726113 · 2025-06-04T10:12:49.000-07:00
PiperOrigin-RevId: 767192378
diff --git a/tensorflow_data_validation/utils/display_util.py b/tensorflow_data_validation/utils/display_util.py
@@ -251,10 +251,14 @@ def get_anomalies_dataframe(anomalies: anomalies_pb2.Anomalies) -> pd.DataFrame:
       )
     else:
       anomaly_info_description = anomaly_info.description
+    anomaly_types = ('; ').join([
+        anomalies_pb2.AnomalyInfo.Type.Name(r.type) for r in anomaly_info.reason
+    ])
     anomaly_rows.append([
         _add_quotes(feature_name),
         anomaly_info_short_description,
         anomaly_info_description,
+        anomaly_types,
     ])
   if anomalies.HasField('dataset_anomaly_info'):
     if not anomalies.dataset_anomaly_info.short_description:
@@ -286,6 +290,7 @@ def get_anomalies_dataframe(anomalies: anomalies_pb2.Anomalies) -> pd.DataFrame:
           'Feature name',
           'Anomaly short description',
           'Anomaly long description',
+          'Anomaly types',
       ],
   ).set_index('Feature name')
   # Do not truncate columns.
diff --git a/tensorflow_data_validation/utils/display_util_test.py b/tensorflow_data_validation/utils/display_util_test.py
@@ -516,8 +516,8 @@ def test_get_anomalies_dataframe(self):
     )
     actual_output = display_util.get_anomalies_dataframe(anomalies)
     # The resulting DataFrame has a row for each feature and a column for each
-    # of the short description and long description.
-    self.assertEqual(actual_output.shape, (2, 2))
+    # of the short description, long description and anomaly types.
+    self.assertEqual(actual_output.shape, (2, 3))
 
   def test_get_anomalies_dataframe_with_no_toplevel_description(self):
     anomalies = text_format.Parse(
@@ -550,8 +550,8 @@ def test_get_anomalies_dataframe_with_no_toplevel_description(self):
     )
     actual_output = display_util.get_anomalies_dataframe(anomalies)
     # The resulting DataFrame has a row for each feature and a column for each
-    # of the short description and long description.
-    self.assertEqual(actual_output.shape, (2, 2))
+    # of the short description, long description and anomaly types.
+    self.assertEqual(actual_output.shape, (2, 3))
 
     # Confirm Anomaly short/long description is not empty
     self.assertNotEmpty(actual_output['Anomaly short description'][0])
@@ -592,7 +592,7 @@ def test_get_drift_skew_dataframe(self):
   def test_get_anomalies_dataframe_no_anomalies(self):
     anomalies = anomalies_pb2.Anomalies()
     actual_output = display_util.get_anomalies_dataframe(anomalies)
-    self.assertEqual(actual_output.shape, (0, 2))
+    self.assertEqual(actual_output.shape, (0, 3))
 
   def test_get_natural_language_statistics_dataframes(self):
     statistics = text_format.Parse(