Skip to content

Commit cc16423

Browse files
Issue #28295: Fixed the documentation and added tests for PyUnicode_AsUCS4().
Original patch by Xiang Zhang.
1 parent 63b5b6f commit cc16423

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

Doc/c-api/unicode.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ APIs:
641641
642642
Copy the string *u* into a UCS4 buffer, including a null character, if
643643
*copy_null* is set. Returns *NULL* and sets an exception on error (in
644-
particular, a :exc:`ValueError` if *buflen* is smaller than the length of
644+
particular, a :exc:`SystemError` if *buflen* is smaller than the length of
645645
*u*). *buffer* is returned on success.
646646
647647
.. versionadded:: 3.3

Include/unicodeobject.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
749749
#endif
750750

751751
/* Copy the string into a UCS4 buffer including the null character if copy_null
752-
is set. Return NULL and raise an exception on error. Raise a ValueError if
752+
is set. Return NULL and raise an exception on error. Raise a SystemError if
753753
the buffer is smaller than the string. Return buffer on success.
754754
755755
buflen is the length of the buffer in (Py_UCS4) characters. */

Lib/test/test_unicode.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2687,6 +2687,23 @@ def test_aswidecharstring(self):
26872687
self.assertEqual(size, nchar)
26882688
self.assertEqual(wchar, nonbmp + '\0')
26892689

2690+
# Test PyUnicode_AsUCS4()
2691+
@support.cpython_only
2692+
def test_asucs4(self):
2693+
from _testcapi import unicode_asucs4
2694+
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
2695+
'a\ud800b\udfffc', '\ud834\udd1e']:
2696+
l = len(s)
2697+
self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
2698+
self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
2699+
self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
2700+
self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
2701+
self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
2702+
self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
2703+
s = '\0'.join([s, s])
2704+
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
2705+
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
2706+
26902707
@support.cpython_only
26912708
def test_encode_decimal(self):
26922709
from _testcapi import unicode_encodedecimal

Modules/_testcapimodule.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1829,6 +1829,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
18291829
return Py_BuildValue("(Nn)", result, size);
18301830
}
18311831

1832+
static PyObject *
1833+
unicode_asucs4(PyObject *self, PyObject *args)
1834+
{
1835+
PyObject *unicode, *result;
1836+
Py_UCS4 *buffer;
1837+
int copy_null;
1838+
Py_ssize_t str_len, buf_len;
1839+
1840+
if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, &copy_null)) {
1841+
return NULL;
1842+
}
1843+
1844+
buf_len = str_len + 1;
1845+
buffer = PyMem_NEW(Py_UCS4, buf_len);
1846+
if (buffer == NULL) {
1847+
return PyErr_NoMemory();
1848+
}
1849+
memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
1850+
buffer[str_len] = 0xffffU;
1851+
1852+
if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
1853+
PyMem_FREE(buffer);
1854+
return NULL;
1855+
}
1856+
1857+
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
1858+
PyMem_FREE(buffer);
1859+
return result;
1860+
}
1861+
18321862
static PyObject *
18331863
unicode_encodedecimal(PyObject *self, PyObject *args)
18341864
{
@@ -3884,6 +3914,7 @@ static PyMethodDef TestMethods[] = {
38843914
{"test_widechar", (PyCFunction)test_widechar, METH_NOARGS},
38853915
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
38863916
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
3917+
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
38873918
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
38883919
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
38893920
{"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},

0 commit comments

Comments
 (0)