Skip to content

Commit a53ada5

Browse files
committed
Use the correct force_utf8 function based on Python version.
Import the experimental branch version of force_utf8 wholesale adding a -py(2|3) suffix and expose the correct implementation dependent on PY2. Include forcing InputException messages to a native string as is done in experimental (also taken directly from that branch) which ensures the exception message, which may be unicode, becomes a string everywhere.
1 parent 0750457 commit a53ada5

File tree

3 files changed

+89
-3
lines changed

3 files changed

+89
-3
lines changed

mig/shared/base.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import re
3737

3838
# IMPORTANT: do not import any other MiG modules here - to avoid import loops
39+
from mig.shared.compat import PY2
3940
from mig.shared.defaults import default_str_coding, default_fs_coding, \
4041
keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \
4142
_user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \
@@ -496,7 +497,7 @@ def is_unicode(val):
496497
return (type(u"") == type(val))
497498

498499

499-
def force_utf8(val, highlight=''):
500+
def _force_utf8_py2(val, highlight=''):
500501
"""Internal helper to encode unicode strings to utf8 version. Actual
501502
changes are marked out with the highlight string if given.
502503
"""
@@ -507,6 +508,31 @@ def force_utf8(val, highlight=''):
507508
return val
508509
return "%s%s%s" % (highlight, val.encode("utf8"), highlight)
509510

511+
def _force_utf8_py3(val, highlight='', stringify=True):
512+
"""Internal helper to encode unicode strings to utf8 version. Actual
513+
changes are marked out with the highlight string if given.
514+
The optional stringify turns ALL values including numbers into string.
515+
"""
516+
# We run into all kind of nasty encoding problems if we mix
517+
if not isinstance(val, basestring):
518+
if stringify:
519+
val = "%s" % val
520+
else:
521+
return val
522+
if not is_unicode(val):
523+
return val
524+
if is_unicode(highlight):
525+
hl_utf = highlight.encode("utf8")
526+
else:
527+
hl_utf = highlight
528+
return (b"%s%s%s" % (hl_utf, val.encode("utf8"), hl_utf))
529+
530+
531+
if PY2:
532+
force_utf8 = _force_utf8_py2
533+
else:
534+
force_utf8 = _force_utf8_py3
535+
510536

511537
def force_utf8_rec(input_obj, highlight=''):
512538
"""Recursive object conversion from unicode to utf8: useful to convert e.g.

mig/shared/safeinput.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
from html import escape as escape_html
5959
assert escape_html is not None
6060

61-
from mig.shared.base import force_unicode, force_utf8
61+
from mig.shared.base import force_unicode, force_native_str
6262
from mig.shared.defaults import src_dst_sep, username_charset, \
6363
username_max_length, session_id_charset, session_id_length, \
6464
subject_id_charset, subject_id_min_length, subject_id_max_length, \
@@ -2294,7 +2294,7 @@ def __init__(self, value):
22942294
def __str__(self):
22952295
"""Return string representation"""
22962296

2297-
return force_utf8(force_unicode(self.value))
2297+
return force_native_str(self.value)
22982298

22992299

23002300
def main(_exit=sys.exit, _print=print):

tests/test_mig_shared_base.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# --- BEGIN_HEADER ---
4+
#
5+
# test_mig_shared_base - unit test of the corresponding mig shared module
6+
# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH
7+
#
8+
# This file is part of MiG.
9+
#
10+
# MiG is free software: you can redistribute it and/or modify
11+
# it under the terms of the GNU General Public License as published by
12+
# the Free Software Foundation; either version 2 of the License, or
13+
# (at your option) any later version.
14+
#
15+
# MiG is distributed in the hope that it will be useful,
16+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
# GNU General Public License for more details.
19+
#
20+
# You should have received a copy of the GNU General Public License
21+
# along with this program; if not, write to the Free Software
22+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
23+
# USA.
24+
#
25+
# --- END_HEADER ---
26+
#
27+
28+
"""Unit test base functions"""
29+
30+
import binascii
31+
import codecs
32+
import os
33+
import sys
34+
35+
from tests.support import PY2, MigTestCase, testmain
36+
37+
from mig.shared.base import force_utf8
38+
39+
DUMMY_STRING = "foo bÆr baz"
40+
DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®'
41+
42+
43+
class MigSharedBase(MigTestCase):
44+
"""Unit tests of fucntions within the mig.shared.base module."""
45+
46+
def test_force_utf8_on_string(self):
47+
actual = force_utf8(DUMMY_STRING)
48+
49+
self.assertIsInstance(actual, bytes)
50+
self.assertEqual(binascii.hexlify(actual), b'666f6f2062c386722062617a')
51+
52+
def test_force_utf8_on_unicode(self):
53+
actual = force_utf8(DUMMY_UNICODE)
54+
55+
self.assertIsInstance(actual, bytes)
56+
self.assertEqual(actual, codecs.encode(DUMMY_UNICODE, 'utf8'))
57+
58+
59+
if __name__ == '__main__':
60+
testmain()

0 commit comments

Comments
 (0)