Skip to content

Commit 3dba348

Browse files
committed
fix: Don't show bytes as base64 encoded strings
1 parent 4f53ec6 commit 3dba348

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

deepnote_toolkit/ocelots/pandas/utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import base64
2-
31
import numpy as np
42
import pandas as pd
53
from packaging.requirements import Requirement
@@ -8,8 +6,12 @@
86

97

108
def safe_convert_to_string(value):
11-
if isinstance(value, bytes):
12-
return base64.b64encode(value).decode("ascii")
9+
"""
10+
Safely convert a value to string, handling cases where str() might fail.
11+
12+
Note: For bytes, this returns Python's standard string representation (e.g., b'hello')
13+
rather than base64 encoding, which is more human-readable.
14+
"""
1315
try:
1416
return str(value)
1517
except Exception:

tests/unit/test_analyze_columns_pandas.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,17 +280,17 @@ def test_categories_with_binary_data(self):
280280
)
281281
result = analyze_columns(df)
282282

283-
base64_hello = "aGVsbG8="
283+
str_hello = "b'hello'"
284284
self.assertIsNotNone(result[0].stats)
285285
self.assertEqual(result[0].stats.unique_count, 4)
286286
self.assertIsNotNone(result[0].stats.categories)
287287
self.assertEqual(len(result[0].stats.categories), 3)
288288
category_names = [cat["name"] for cat in result[0].stats.categories]
289-
self.assertIn(base64_hello, category_names)
289+
self.assertIn(str_hello, category_names)
290290
hello_count = next(
291291
cat["count"]
292292
for cat in result[0].stats.categories
293-
if cat["name"] == base64_hello
293+
if cat["name"] == str_hello
294294
)
295295
self.assertEqual(hello_count, 2)
296296
has_others = any("others" in cat["name"] for cat in result[0].stats.categories)
@@ -522,7 +522,9 @@ def test_min_max_non_comparable_objects(self):
522522
"""Test TypeError/ValueError handling."""
523523
# Create a column with non-comparable objects that pass numeric check
524524
# Using object dtype with mixed incomparable types
525-
df = pd.DataFrame({"col1": pd.array([{"a": 1}, {"b": 2}, {"c": 3}], dtype=object)})
525+
df = pd.DataFrame(
526+
{"col1": pd.array([{"a": 1}, {"b": 2}, {"c": 3}], dtype=object)}
527+
)
526528
result = analyze_columns(df)
527529

528530
# Should handle the error gracefully and return None for min/max

tests/unit/test_ocelots.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import base64
21
import io
32
import unittest
43
import warnings
@@ -183,7 +182,7 @@ def test_to_records_json(self, df: DataFrame):
183182
self.assertEqual(first_row["list"], "[1, 2, 3]")
184183
self.assertEqual(first_row["datetime"], "2023-01-01 12:00:00")
185184

186-
expected_hello = base64.b64encode(b"hello").decode("ascii")
185+
expected_hello = "b'hello'"
187186
self.assertEqual(first_row["binary"], expected_hello)
188187

189188
@_test_with_all_backends(testing_dataframes["many_rows_10k"])

0 commit comments

Comments
 (0)