Merge pull request ipython#259 from minrk/bytes

always b64-encode bytes objects on Python 3
Carreau · Aug 10, 2017 · 4717ca5 · 4717ca5
2 parents de7b8a3 + af9d4b8
commit 4717ca5
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 29 deletions.
diff --git a/ipykernel/displayhook.py b/ipykernel/displayhook.py
@@ -68,7 +68,7 @@ def write_output_prompt(self):
         self.msg['content']['execution_count'] = self.prompt_count
 
     def write_format_data(self, format_dict, md_dict=None):
-        self.msg['content']['data'] = encode_images(format_dict)
+        self.msg['content']['data'] = json_clean(encode_images(format_dict))
         self.msg['content']['metadata'] = md_dict
 
     def finish_displayhook(self):

diff --git a/ipykernel/jsonutil.py b/ipykernel/jsonutil.py
@@ -3,18 +3,13 @@
 # Copyright (c) IPython Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+from binascii import b2a_base64
 import math
 import re
 import types
 from datetime import datetime
 import numbers
 
-try:
-    # base64.encodestring is deprecated in Python 3.x
-    from base64 import encodebytes
-except ImportError:
-    # Python 2.x
-    from base64 import encodestring as encodebytes
 
 from ipython_genutils import py3compat
 from ipython_genutils.py3compat import unicode_type, iteritems
@@ -71,20 +66,27 @@ def encode_images(format_dict):
         is base64-encoded.
 
     """
+
+    # no need for handling of ambiguous bytestrings on Python 3,
+    # where bytes objects always represent binary data and thus
+    # base64-encoded.
+    if py3compat.PY3:
+        return format_dict
+
     encoded = format_dict.copy()
 
     pngdata = format_dict.get('image/png')
     if isinstance(pngdata, bytes):
         # make sure we don't double-encode
         if not pngdata.startswith(PNG64):
-            pngdata = encodebytes(pngdata)
+            pngdata = b2a_base64(pngdata)
         encoded['image/png'] = pngdata.decode('ascii')
 
     jpegdata = format_dict.get('image/jpeg')
     if isinstance(jpegdata, bytes):
         # make sure we don't double-encode
         if not jpegdata.startswith(JPEG64):
-            jpegdata = encodebytes(jpegdata)
+            jpegdata = b2a_base64(jpegdata)
         encoded['image/jpeg'] = jpegdata.decode('ascii')
 
     gifdata = format_dict.get('image/gif')
@@ -98,7 +100,7 @@ def encode_images(format_dict):
     if isinstance(pdfdata, bytes):
         # make sure we don't double-encode
         if not pdfdata.startswith(PDF64):
-            pdfdata = encodebytes(pdfdata)
+            pdfdata = b2a_base64(pdfdata)
         encoded['application/pdf'] = pdfdata.decode('ascii')
 
     return encoded
@@ -151,9 +153,21 @@ def json_clean(obj):
 
     if isinstance(obj, atomic_ok):
         return obj
-
+    
     if isinstance(obj, bytes):
-        return obj.decode(DEFAULT_ENCODING, 'replace')
+        if py3compat.PY3:
+            # unanmbiguous binary data is base64-encoded
+            # (this probably should have happened upstream)
+            return b2a_base64(obj).decode('ascii')
+        else:
+            # Python 2 bytestr is ambiguous,
+            # needs special handling for possible binary bytestrings.
+            # imperfect workaround: if ascii, assume text.
+            # otherwise assume binary, base64-encode (py3 behavior).
+            try:
+                return obj.decode('ascii')
+            except UnicodeDecodeError:
+                return b2a_base64(obj).decode('ascii')
 
     if isinstance(obj, container_to_list) or (
         hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):

diff --git a/ipykernel/tests/test_jsonutil.py b/ipykernel/tests/test_jsonutil.py
@@ -4,13 +4,8 @@
 # Copyright (c) IPython Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+from binascii import a2b_base64
 import json
-import sys
-
-if sys.version_info < (3,):
-    from base64 import decodestring as decodebytes
-else:
-    from base64 import decodebytes
 
 from datetime import datetime
 import numbers
@@ -19,7 +14,7 @@
 
 from .. import jsonutil
 from ..jsonutil import json_clean, encode_images
-from ipython_genutils.py3compat import unicode_to_str, str_to_bytes, iteritems
+from ipython_genutils.py3compat import unicode_to_str
 
 class MyInt(object):
     def __int__(self):
@@ -70,28 +65,30 @@ def test_encode_images():
     pngdata = b'\x89PNG\r\n\x1a\nblahblahnotactuallyvalidIEND\xaeB`\x82'
     jpegdata = b'\xff\xd8\xff\xe0\x00\x10JFIFblahblahjpeg(\xa0\x0f\xff\xd9'
     pdfdata = b'%PDF-1.\ntrailer<</Root<</Pages<</Kids[<</MediaBox[0 0 3 3]>>]>>>>>>'
+    bindata = b'\xff\xff\xff\xff'
 
     fmt = {
         'image/png'  : pngdata,
         'image/jpeg' : jpegdata,
-        'application/pdf' : pdfdata
+        'application/pdf' : pdfdata,
+        'application/unrecognized': bindata,
     }
-    encoded = encode_images(fmt)
-    for key, value in iteritems(fmt):
+    encoded = json_clean(encode_images(fmt))
+    for key, value in fmt.items():
         # encoded has unicode, want bytes
-        decoded = decodebytes(encoded[key].encode('ascii'))
+        decoded = a2b_base64(encoded[key])
         nt.assert_equal(decoded, value)
-    encoded2 = encode_images(encoded)
+    encoded2 = json_clean(encode_images(encoded))
     nt.assert_equal(encoded, encoded2)
 
+    # test that we don't double-encode base64 str
     b64_str = {}
-    for key, encoded in iteritems(encoded):
+    for key, encoded in encoded.items():
         b64_str[key] = unicode_to_str(encoded)
-    encoded3 = encode_images(b64_str)
+    encoded3 = json_clean(encode_images(b64_str))
     nt.assert_equal(encoded3, b64_str)
-    for key, value in iteritems(fmt):
-        # encoded3 has str, want bytes
-        decoded = decodebytes(str_to_bytes(encoded3[key]))
+    for key, value in fmt.items():
+        decoded = a2b_base64(encoded3[key])
         nt.assert_equal(decoded, value)
 
 def test_lambda():