Skip to content

Commit

Permalink
Merge pull request ipython#259 from minrk/bytes
Browse files Browse the repository at this point in the history
always b64-encode bytes objects on Python 3
  • Loading branch information
takluyver authored Aug 10, 2017
2 parents de7b8a3 + af9d4b8 commit 4717ca5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 29 deletions.
2 changes: 1 addition & 1 deletion ipykernel/displayhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def write_output_prompt(self):
self.msg['content']['execution_count'] = self.prompt_count

def write_format_data(self, format_dict, md_dict=None):
self.msg['content']['data'] = encode_images(format_dict)
self.msg['content']['data'] = json_clean(encode_images(format_dict))
self.msg['content']['metadata'] = md_dict

def finish_displayhook(self):
Expand Down
36 changes: 25 additions & 11 deletions ipykernel/jsonutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,13 @@
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.

from binascii import b2a_base64
import math
import re
import types
from datetime import datetime
import numbers

try:
# base64.encodestring is deprecated in Python 3.x
from base64 import encodebytes
except ImportError:
# Python 2.x
from base64 import encodestring as encodebytes

from ipython_genutils import py3compat
from ipython_genutils.py3compat import unicode_type, iteritems
Expand Down Expand Up @@ -71,20 +66,27 @@ def encode_images(format_dict):
is base64-encoded.
"""

# no need for handling of ambiguous bytestrings on Python 3,
# where bytes objects always represent binary data and thus
# base64-encoded.
if py3compat.PY3:
return format_dict

encoded = format_dict.copy()

pngdata = format_dict.get('image/png')
if isinstance(pngdata, bytes):
# make sure we don't double-encode
if not pngdata.startswith(PNG64):
pngdata = encodebytes(pngdata)
pngdata = b2a_base64(pngdata)
encoded['image/png'] = pngdata.decode('ascii')

jpegdata = format_dict.get('image/jpeg')
if isinstance(jpegdata, bytes):
# make sure we don't double-encode
if not jpegdata.startswith(JPEG64):
jpegdata = encodebytes(jpegdata)
jpegdata = b2a_base64(jpegdata)
encoded['image/jpeg'] = jpegdata.decode('ascii')

gifdata = format_dict.get('image/gif')
Expand All @@ -98,7 +100,7 @@ def encode_images(format_dict):
if isinstance(pdfdata, bytes):
# make sure we don't double-encode
if not pdfdata.startswith(PDF64):
pdfdata = encodebytes(pdfdata)
pdfdata = b2a_base64(pdfdata)
encoded['application/pdf'] = pdfdata.decode('ascii')

return encoded
Expand Down Expand Up @@ -151,9 +153,21 @@ def json_clean(obj):

if isinstance(obj, atomic_ok):
return obj

if isinstance(obj, bytes):
return obj.decode(DEFAULT_ENCODING, 'replace')
if py3compat.PY3:
# unanmbiguous binary data is base64-encoded
# (this probably should have happened upstream)
return b2a_base64(obj).decode('ascii')
else:
# Python 2 bytestr is ambiguous,
# needs special handling for possible binary bytestrings.
# imperfect workaround: if ascii, assume text.
# otherwise assume binary, base64-encode (py3 behavior).
try:
return obj.decode('ascii')
except UnicodeDecodeError:
return b2a_base64(obj).decode('ascii')

if isinstance(obj, container_to_list) or (
hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
Expand Down
31 changes: 14 additions & 17 deletions ipykernel/tests/test_jsonutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,8 @@
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.

from binascii import a2b_base64
import json
import sys

if sys.version_info < (3,):
from base64 import decodestring as decodebytes
else:
from base64 import decodebytes

from datetime import datetime
import numbers
Expand All @@ -19,7 +14,7 @@

from .. import jsonutil
from ..jsonutil import json_clean, encode_images
from ipython_genutils.py3compat import unicode_to_str, str_to_bytes, iteritems
from ipython_genutils.py3compat import unicode_to_str

class MyInt(object):
def __int__(self):
Expand Down Expand Up @@ -70,28 +65,30 @@ def test_encode_images():
pngdata = b'\x89PNG\r\n\x1a\nblahblahnotactuallyvalidIEND\xaeB`\x82'
jpegdata = b'\xff\xd8\xff\xe0\x00\x10JFIFblahblahjpeg(\xa0\x0f\xff\xd9'
pdfdata = b'%PDF-1.\ntrailer<</Root<</Pages<</Kids[<</MediaBox[0 0 3 3]>>]>>>>>>'
bindata = b'\xff\xff\xff\xff'

fmt = {
'image/png' : pngdata,
'image/jpeg' : jpegdata,
'application/pdf' : pdfdata
'application/pdf' : pdfdata,
'application/unrecognized': bindata,
}
encoded = encode_images(fmt)
for key, value in iteritems(fmt):
encoded = json_clean(encode_images(fmt))
for key, value in fmt.items():
# encoded has unicode, want bytes
decoded = decodebytes(encoded[key].encode('ascii'))
decoded = a2b_base64(encoded[key])
nt.assert_equal(decoded, value)
encoded2 = encode_images(encoded)
encoded2 = json_clean(encode_images(encoded))
nt.assert_equal(encoded, encoded2)

# test that we don't double-encode base64 str
b64_str = {}
for key, encoded in iteritems(encoded):
for key, encoded in encoded.items():
b64_str[key] = unicode_to_str(encoded)
encoded3 = encode_images(b64_str)
encoded3 = json_clean(encode_images(b64_str))
nt.assert_equal(encoded3, b64_str)
for key, value in iteritems(fmt):
# encoded3 has str, want bytes
decoded = decodebytes(str_to_bytes(encoded3[key]))
for key, value in fmt.items():
decoded = a2b_base64(encoded3[key])
nt.assert_equal(decoded, value)

def test_lambda():
Expand Down

0 comments on commit 4717ca5

Please sign in to comment.