Skip to content

Commit fc03f17

Browse files
committed
Add Anomaly types to the dataframe generated by get_anomalies_dataframe
PiperOrigin-RevId: 767192378
1 parent 946dfc7 commit fc03f17

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

tensorflow_data_validation/utils/display_util.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,14 @@ def get_anomalies_dataframe(anomalies: anomalies_pb2.Anomalies) -> pd.DataFrame:
251251
)
252252
else:
253253
anomaly_info_description = anomaly_info.description
254+
anomaly_types = ('; ').join([
255+
anomalies_pb2.AnomalyInfo.Type.Name(r.type) for r in anomaly_info.reason
256+
])
254257
anomaly_rows.append([
255258
_add_quotes(feature_name),
256259
anomaly_info_short_description,
257260
anomaly_info_description,
261+
anomaly_types,
258262
])
259263
if anomalies.HasField('dataset_anomaly_info'):
260264
if not anomalies.dataset_anomaly_info.short_description:
@@ -286,6 +290,7 @@ def get_anomalies_dataframe(anomalies: anomalies_pb2.Anomalies) -> pd.DataFrame:
286290
'Feature name',
287291
'Anomaly short description',
288292
'Anomaly long description',
293+
'Anomaly types',
289294
],
290295
).set_index('Feature name')
291296
# Do not truncate columns.

tensorflow_data_validation/utils/display_util_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,8 @@ def test_get_anomalies_dataframe(self):
516516
)
517517
actual_output = display_util.get_anomalies_dataframe(anomalies)
518518
# The resulting DataFrame has a row for each feature and a column for each
519-
# of the short description and long description.
520-
self.assertEqual(actual_output.shape, (2, 2))
519+
# of the short description, long description and anomaly types.
520+
self.assertEqual(actual_output.shape, (2, 3))
521521

522522
def test_get_anomalies_dataframe_with_no_toplevel_description(self):
523523
anomalies = text_format.Parse(
@@ -550,8 +550,8 @@ def test_get_anomalies_dataframe_with_no_toplevel_description(self):
550550
)
551551
actual_output = display_util.get_anomalies_dataframe(anomalies)
552552
# The resulting DataFrame has a row for each feature and a column for each
553-
# of the short description and long description.
554-
self.assertEqual(actual_output.shape, (2, 2))
553+
# of the short description, long description and anomaly types.
554+
self.assertEqual(actual_output.shape, (2, 3))
555555

556556
# Confirm Anomaly short/long description is not empty
557557
self.assertNotEmpty(actual_output['Anomaly short description'][0])
@@ -592,7 +592,7 @@ def test_get_drift_skew_dataframe(self):
592592
def test_get_anomalies_dataframe_no_anomalies(self):
593593
anomalies = anomalies_pb2.Anomalies()
594594
actual_output = display_util.get_anomalies_dataframe(anomalies)
595-
self.assertEqual(actual_output.shape, (0, 2))
595+
self.assertEqual(actual_output.shape, (0, 3))
596596

597597
def test_get_natural_language_statistics_dataframes(self):
598598
statistics = text_format.Parse(

0 commit comments

Comments
 (0)