fix: Don't show bytes as base64 encoded strings

m1so · m1so · commit 3dba348021fa · 2025-11-06T22:56:16.000+01:00
diff --git a/deepnote_toolkit/ocelots/pandas/utils.py b/deepnote_toolkit/ocelots/pandas/utils.py
@@ -1,5 +1,3 @@
-import base64
-
 import numpy as np
 import pandas as pd
 from packaging.requirements import Requirement
@@ -8,8 +6,12 @@
 
 
 def safe_convert_to_string(value):
-    if isinstance(value, bytes):
-        return base64.b64encode(value).decode("ascii")
+    """
+    Safely convert a value to string, handling cases where str() might fail.
+
+    Note: For bytes, this returns Python's standard string representation (e.g., b'hello')
+    rather than base64 encoding, which is more human-readable.
+    """
     try:
         return str(value)
     except Exception:
diff --git a/tests/unit/test_analyze_columns_pandas.py b/tests/unit/test_analyze_columns_pandas.py
@@ -280,17 +280,17 @@ def test_categories_with_binary_data(self):
         )
         result = analyze_columns(df)
 
-        base64_hello = "aGVsbG8="
+        str_hello = "b'hello'"
         self.assertIsNotNone(result[0].stats)
         self.assertEqual(result[0].stats.unique_count, 4)
         self.assertIsNotNone(result[0].stats.categories)
         self.assertEqual(len(result[0].stats.categories), 3)
         category_names = [cat["name"] for cat in result[0].stats.categories]
-        self.assertIn(base64_hello, category_names)
+        self.assertIn(str_hello, category_names)
         hello_count = next(
             cat["count"]
             for cat in result[0].stats.categories
-            if cat["name"] == base64_hello
+            if cat["name"] == str_hello
         )
         self.assertEqual(hello_count, 2)
         has_others = any("others" in cat["name"] for cat in result[0].stats.categories)
@@ -522,7 +522,9 @@ def test_min_max_non_comparable_objects(self):
         """Test TypeError/ValueError handling."""
         # Create a column with non-comparable objects that pass numeric check
         # Using object dtype with mixed incomparable types
-        df = pd.DataFrame({"col1": pd.array([{"a": 1}, {"b": 2}, {"c": 3}], dtype=object)})
+        df = pd.DataFrame(
+            {"col1": pd.array([{"a": 1}, {"b": 2}, {"c": 3}], dtype=object)}
+        )
         result = analyze_columns(df)
 
         # Should handle the error gracefully and return None for min/max
diff --git a/tests/unit/test_ocelots.py b/tests/unit/test_ocelots.py
@@ -1,4 +1,3 @@
-import base64
 import io
 import unittest
 import warnings
@@ -183,7 +182,7 @@ def test_to_records_json(self, df: DataFrame):
         self.assertEqual(first_row["list"], "[1, 2, 3]")
         self.assertEqual(first_row["datetime"], "2023-01-01 12:00:00")
 
-        expected_hello = base64.b64encode(b"hello").decode("ascii")
+        expected_hello = "b'hello'"
         self.assertEqual(first_row["binary"], expected_hello)
 
     @_test_with_all_backends(testing_dataframes["many_rows_10k"])