Skip to content

Commit

Permalink
bpo-36346: Make using the legacy Unicode C API optional (GH-21437)
Browse files Browse the repository at this point in the history
Add compile time option USE_UNICODE_WCHAR_CACHE. Setting it to 0
makes the interpreter not using the wchar_t cache and the legacy Unicode C API.
  • Loading branch information
serhiy-storchaka authored Jul 10, 2020
1 parent 9650fe0 commit 4c8f09d
Show file tree
Hide file tree
Showing 17 changed files with 360 additions and 99 deletions.
4 changes: 3 additions & 1 deletion Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

/* --- Internal Unicode Operations ---------------------------------------- */

#define USE_UNICODE_WCHAR_CACHE 1
#ifndef USE_UNICODE_WCHAR_CACHE
# define USE_UNICODE_WCHAR_CACHE 1
#endif /* USE_UNICODE_WCHAR_CACHE */

/* Since splitting on whitespace is an important use case, and
whitespace in most situations is solely ASCII whitespace, we
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@
from .testresult import get_test_runner


try:
from _testcapi import unicode_legacy_string
except ImportError:
unicode_legacy_string = None

__all__ = [
# globals
"PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast",
Expand Down Expand Up @@ -426,6 +431,9 @@ def requires_lzma(reason='requires lzma'):
lzma = None
return unittest.skipUnless(lzma, reason)

requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string,
'requires legacy Unicode C API')

is_jython = sys.platform.startswith('java')

is_android = hasattr(sys, 'getandroidapilevel')
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,9 @@ def test_writerows_errors(self):
self.assertRaises(OSError, writer.writerows, BadIterable())

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_writerows_legacy_strings(self):
import _testcapi

c = _testcapi.unicode_legacy_string('a')
with TemporaryFile("w+", newline='') as fileobj:
writer = csv.writer(fileobj)
Expand Down
5 changes: 4 additions & 1 deletion Lib/test/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
import numbers
import locale
from test.support import (run_unittest, run_doctest, is_resource_enabled,
requires_IEEE_754, requires_docstrings)
requires_IEEE_754, requires_docstrings,
requires_legacy_unicode_capi)
from test.support import (TestFailed,
run_with_locale, cpython_only)
from test.support.import_helper import import_fresh_module
Expand Down Expand Up @@ -582,6 +583,7 @@ def test_explicit_from_string(self):
self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")

@cpython_only
@requires_legacy_unicode_capi
def test_from_legacy_strings(self):
import _testcapi
Decimal = self.decimal.Decimal
Expand Down Expand Up @@ -2817,6 +2819,7 @@ def test_none_args(self):
Overflow])

@cpython_only
@requires_legacy_unicode_capi
def test_from_legacy_strings(self):
import _testcapi
c = self.decimal.Context()
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_getargs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,7 @@ def test_et_hash(self):
buf = bytearray()
self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf)

@support.requires_legacy_unicode_capi
def test_u(self):
from _testcapi import getargs_u
self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
Expand All @@ -985,6 +986,7 @@ def test_u(self):
self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
self.assertRaises(TypeError, getargs_u, None)

@support.requires_legacy_unicode_capi
def test_u_hash(self):
from _testcapi import getargs_u_hash
self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
Expand All @@ -994,6 +996,7 @@ def test_u_hash(self):
self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
self.assertRaises(TypeError, getargs_u_hash, None)

@support.requires_legacy_unicode_capi
def test_Z(self):
from _testcapi import getargs_Z
self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
Expand All @@ -1003,6 +1006,7 @@ def test_Z(self):
self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
self.assertIsNone(getargs_Z(None))

@support.requires_legacy_unicode_capi
def test_Z_hash(self):
from _testcapi import getargs_Z_hash
self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ def test_isidentifier(self):
self.assertFalse("0".isidentifier())

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_isidentifier_legacy(self):
import _testcapi
u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
Expand Down Expand Up @@ -2350,6 +2351,7 @@ def test_getnewargs(self):
self.assertEqual(len(args), 1)

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_resize(self):
from _testcapi import getargs_u
for length in range(1, 100, 7):
Expand Down Expand Up @@ -2920,6 +2922,7 @@ def test_copycharacters(self):
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal
self.assertEqual(unicode_encodedecimal('123'),
Expand All @@ -2936,6 +2939,7 @@ def test_encode_decimal(self):
unicode_encodedecimal, "123\u20ac", "replace")

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
self.assertEqual(transform_decimal('123'),
Expand Down
12 changes: 12 additions & 0 deletions Modules/_io/fileio.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,14 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
return -1;
}
#if USE_UNICODE_WCHAR_CACHE
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
widename = PyUnicode_AsUnicode(stringobj);
_Py_COMP_DIAG_POP
#else /* USE_UNICODE_WCHAR_CACHE */
widename = PyUnicode_AsWideCharString(stringobj, NULL);
#endif /* USE_UNICODE_WCHAR_CACHE */
if (widename == NULL)
return -1;
#else
Expand Down Expand Up @@ -491,6 +498,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
internal_close(self);

done:
#ifdef MS_WINDOWS
#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(widename);
#endif /* USE_UNICODE_WCHAR_CACHE */
#endif
Py_CLEAR(stringobj);
return ret;
}
Expand Down
16 changes: 15 additions & 1 deletion Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1668,6 +1668,7 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)

static volatile int x;

#if USE_UNICODE_WCHAR_CACHE
/* Ignore use of deprecated APIs */
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
Expand Down Expand Up @@ -1772,6 +1773,8 @@ test_Z_code(PyObject *self, PyObject *Py_UNUSED(ignored))
Py_DECREF(tuple);
Py_RETURN_NONE;
}
_Py_COMP_DIAG_POP
#endif /* USE_UNICODE_WCHAR_CACHE */

static PyObject *
test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
Expand Down Expand Up @@ -1824,6 +1827,10 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
return raiseTestError("test_widechar",
"PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");

#if USE_UNICODE_WCHAR_CACHE
/* Ignore use of deprecated APIs */
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
wide = PyUnicode_FromUnicode(invalid, 1);
if (wide == NULL)
PyErr_Clear();
Expand All @@ -1844,11 +1851,12 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
return raiseTestError("test_widechar",
"PyUnicode_Ready() didn't fail");
}
_Py_COMP_DIAG_POP
#endif /* USE_UNICODE_WCHAR_CACHE */
#endif

Py_RETURN_NONE;
}
_Py_COMP_DIAG_POP

static PyObject *
unicode_aswidechar(PyObject *self, PyObject *args)
Expand Down Expand Up @@ -2024,6 +2032,7 @@ unicode_copycharacters(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", to_copy, copied);
}

#if USE_UNICODE_WCHAR_CACHE
/* Ignore use of deprecated APIs */
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
Expand Down Expand Up @@ -2096,6 +2105,7 @@ unicode_legacy_string(PyObject *self, PyObject *args)
return u;
}
_Py_COMP_DIAG_POP
#endif /* USE_UNICODE_WCHAR_CACHE */

static PyObject *
getargs_w_star(PyObject *self, PyObject *args)
Expand Down Expand Up @@ -5398,8 +5408,10 @@ static PyMethodDef TestMethods[] = {
{"codec_incrementaldecoder",
(PyCFunction)codec_incrementaldecoder, METH_VARARGS},
{"test_s_code", test_s_code, METH_NOARGS},
#if USE_UNICODE_WCHAR_CACHE
{"test_u_code", test_u_code, METH_NOARGS},
{"test_Z_code", test_Z_code, METH_NOARGS},
#endif /* USE_UNICODE_WCHAR_CACHE */
{"test_widechar", test_widechar, METH_NOARGS},
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
Expand All @@ -5408,9 +5420,11 @@ static PyMethodDef TestMethods[] = {
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
#if USE_UNICODE_WCHAR_CACHE
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
{"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},
#endif /* USE_UNICODE_WCHAR_CACHE */
{"_test_thread_state", test_thread_state, METH_VARARGS},
{"_pending_threadfunc", pending_threadfunc, METH_VARARGS},
#ifdef HAVE_GETTIMEOFDAY
Expand Down
17 changes: 9 additions & 8 deletions Modules/_winapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,11 @@ create_converter('LPCVOID', '" F_POINTER "')
create_converter('BOOL', 'i') # F_BOOL used previously (always 'i')
create_converter('DWORD', 'k') # F_DWORD is always "k" (which is much shorter)
create_converter('LPCTSTR', 's')
create_converter('LPCWSTR', 'u')
create_converter('LPWSTR', 'u')
create_converter('UINT', 'I') # F_UINT used previously (always 'I')
class LPCWSTR_converter(Py_UNICODE_converter):
type = 'LPCWSTR'
class HANDLE_return_converter(CReturnConverter):
type = 'HANDLE'
Expand Down Expand Up @@ -197,7 +198,7 @@ class LPVOID_return_converter(CReturnConverter):
data.return_conversion.append(
'return_value = HANDLE_TO_PYNUM(_return_value);\n')
[python start generated code]*/
/*[python end generated code: output=da39a3ee5e6b4b0d input=79464c61a31ae932]*/
/*[python end generated code: output=da39a3ee5e6b4b0d input=011ee0c3a2244bfe]*/

#include "clinic/_winapi.c.h"

Expand Down Expand Up @@ -520,15 +521,15 @@ _winapi_CreateFileMapping_impl(PyObject *module, HANDLE file_handle,
/*[clinic input]
_winapi.CreateJunction
src_path: LPWSTR
dst_path: LPWSTR
src_path: LPCWSTR
dst_path: LPCWSTR
/
[clinic start generated code]*/

static PyObject *
_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path,
LPWSTR dst_path)
/*[clinic end generated code: output=66b7eb746e1dfa25 input=8cd1f9964b6e3d36]*/
_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path,
LPCWSTR dst_path)
/*[clinic end generated code: output=44b3f5e9bbcc4271 input=963d29b44b9384a7]*/
{
/* Privilege adjustment */
HANDLE token = NULL;
Expand Down
64 changes: 53 additions & 11 deletions Modules/clinic/_winapi.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4c8f09d

Please sign in to comment.