Skip to content

Commit 4f53ec6

Browse files
committed
chore: Improve test coverage
1 parent f8dda7d commit 4f53ec6

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

tests/unit/test_analyze_columns_pandas.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,34 @@ def test_histogram_timedelta_conversion(self):
334334
self.assertIsNotNone(result[0].stats.histogram)
335335
self.assertEqual(len(result[0].stats.histogram), 10)
336336

337+
def test_histogram_large_integer_edge_case(self):
338+
"""Test histogram with very large integers that might cause IndexError."""
339+
# Large integers that might cause edge cases in NumPy histogram
340+
large_values = [10**15, 10**15 + 1, 10**15 + 2]
341+
df = pd.DataFrame({"col1": large_values})
342+
result = analyze_columns(df)
343+
344+
# Should handle gracefully without crashing
345+
self.assertIsNotNone(result[0].stats)
346+
347+
def test_histogram_single_unique_value_int(self):
348+
"""Test histogram with a single unique integer value (zero range)."""
349+
df = pd.DataFrame({"col1": [100] * 50})
350+
result = analyze_columns(df)
351+
352+
# Should handle zero data range gracefully
353+
self.assertIsNotNone(result[0].stats)
354+
self.assertEqual(result[0].stats.unique_count, 1)
355+
356+
def test_histogram_single_unique_value_float(self):
357+
"""Test histogram with a single unique float value (zero range)."""
358+
df = pd.DataFrame({"col1": [3.14159] * 50})
359+
result = analyze_columns(df)
360+
361+
# Should handle zero data range gracefully
362+
self.assertIsNotNone(result[0].stats)
363+
self.assertEqual(result[0].stats.unique_count, 1)
364+
337365

338366
class TestAnalyzeColumnsPerformanceBudget(unittest.TestCase):
339367
def test_within_budget_all_columns_analyzed(self):
@@ -490,6 +518,28 @@ def test_min_max_datetime(self):
490518
self.assertTrue("2020-01-01" in result[0].stats.min)
491519
self.assertTrue("2020-01-05" in result[0].stats.max)
492520

521+
def test_min_max_non_comparable_objects(self):
522+
"""Test TypeError/ValueError handling."""
523+
# Create a column with non-comparable objects that pass numeric check
524+
# Using object dtype with mixed incomparable types
525+
df = pd.DataFrame({"col1": pd.array([{"a": 1}, {"b": 2}, {"c": 3}], dtype=object)})
526+
result = analyze_columns(df)
527+
528+
# Should handle the error gracefully and return None for min/max
529+
self.assertIsNone(result[0].stats.min)
530+
self.assertIsNone(result[0].stats.max)
531+
532+
def test_min_max_object_dtype_non_numeric(self):
533+
"""Test explicit non-numeric object dtype."""
534+
df = pd.DataFrame({"col1": pd.array(["x", "y", "z"], dtype=object)})
535+
result = analyze_columns(df)
536+
537+
# Non-numeric dtype should return None for min/max
538+
self.assertIsNone(result[0].stats.min)
539+
self.assertIsNone(result[0].stats.max)
540+
# Should have categories instead
541+
self.assertIsNotNone(result[0].stats.categories)
542+
493543

494544
class TestAnalyzeColumnsMultipleTypes(unittest.TestCase):
495545
def test_multiple_numeric_and_string_columns(self):

0 commit comments

Comments
 (0)