Skip to content

Commit 4e0b96f

Browse files
tadeustefanseefeld
authored andcommitted
Fix conversion of PyUnicodeObject to wstring (#93)
1 parent 7178a70 commit 4e0b96f

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

src/converter/builtin_converters.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,22 @@ namespace
430430
// Remember that this will be used to construct the result object
431431
static std::wstring extract(PyObject* intermediate)
432432
{
433+
// On Windows, with Python >= 3.3, PyObject_Length cannot be used to get
434+
// the size of the wchar_t string, because it will count the number of
435+
// *code points*, but some characters not on the BMP will use two UTF-16
436+
// *code units* (surrogate pairs).
437+
// This is not a problem on Unix, since wchar_t is 32-bit.
438+
#if defined(_WIN32) && PY_VERSION_HEX >= 0x03030000
439+
BOOST_STATIC_ASSERT(sizeof(wchar_t) == 2);
440+
441+
Py_ssize_t size = 0;
442+
wchar_t *buf = PyUnicode_AsWideCharString(intermediate, &size);
443+
if (buf == NULL) {
444+
boost::python::throw_error_already_set();
445+
}
446+
std::wstring result(buf, size);
447+
PyMem_Free(buf);
448+
#else
433449
std::wstring result(::PyObject_Length(intermediate), L' ');
434450
if (!result.empty())
435451
{
@@ -444,6 +460,7 @@ namespace
444460
if (err == -1)
445461
throw_error_already_set();
446462
}
463+
#endif
447464
return result;
448465
}
449466
static PyTypeObject const* get_pytype() { return &PyUnicode_Type;}

test/test_builtin_converters.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@
133133
>>> print(rewrap_value_wstring(u'yo, wassup?'))
134134
yo, wassup?
135135
136+
>>> print(rewrap_value_wstring(u'\U0001f4a9'))
137+
\U0001f4a9
138+
136139
test that overloading on unicode works:
137140
138141
>>> print(rewrap_value_string(u'yo, wassup?'))

0 commit comments

Comments
 (0)