Skip to content

Commit 20b8418

Browse files
committed
Fix conversion of PyUnicodeObject to wstring
On Windows, with Python >= 3.3, PyObject_Length cannot be used to get the size of the wchar_t string, because it will count the number of *code points*, but some characters not on the BMP will use two UTF-16 *code units* (surrogate pairs). This is not a problem on Unix, since wchar_t is 32-bit. This also fixes a problem where test_builtin_converters.py was not being run, since the module docstring was not the first statement.
1 parent 36bbdde commit 20b8418

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

src/converter/builtin_converters.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,17 @@ namespace
430430
// Remember that this will be used to construct the result object
431431
static std::wstring extract(PyObject* intermediate)
432432
{
433+
#if defined(_WIN32) && PY_VERSION_HEX >= 0x03030000
434+
BOOST_STATIC_ASSERT(sizeof(wchar_t) == 2);
435+
436+
Py_ssize_t size = 0;
437+
wchar_t *buf = PyUnicode_AsWideCharString(intermediate, &size);
438+
if (buf == NULL) {
439+
boost::python::throw_error_already_set();
440+
}
441+
std::wstring result(buf, size);
442+
PyMem_Free(buf);
443+
#else
433444
std::wstring result(::PyObject_Length(intermediate), L' ');
434445
if (!result.empty())
435446
{
@@ -444,6 +455,7 @@ namespace
444455
if (err == -1)
445456
throw_error_already_set();
446457
}
458+
#endif
447459
return result;
448460
}
449461
static PyTypeObject const* get_pytype() { return &PyUnicode_Type;}

test/test_builtin_converters.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
# Copyright David Abrahams 2004. Distributed under the Boost
22
# Software License, Version 1.0. (See accompanying
33
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4-
import sys
5-
if (sys.version_info.major >= 3):
6-
long = int
74
r"""
85
>>> from builtin_converters_ext import *
96
@@ -136,6 +133,9 @@
136133
>>> print(rewrap_value_wstring(u'yo, wassup?'))
137134
yo, wassup?
138135
136+
>>> print(rewrap_value_wstring(u'\U0001f4a9'))
137+
\U0001f4a9
138+
139139
test that overloading on unicode works:
140140
141141
>>> print(rewrap_value_string(u'yo, wassup?'))
@@ -283,6 +283,10 @@
283283
>>> assert return_null_handle() is None
284284
"""
285285

286+
import sys
287+
if (sys.version_info.major >= 3):
288+
long = int
289+
286290
def run(args = None):
287291
import sys
288292
import doctest

0 commit comments

Comments
 (0)