Skip to content

Commit 4c8f09d

Browse files
bpo-36346: Make using the legacy Unicode C API optional (GH-21437)
Add compile time option USE_UNICODE_WCHAR_CACHE. Setting it to 0 makes the interpreter not using the wchar_t cache and the legacy Unicode C API.
1 parent 9650fe0 commit 4c8f09d

17 files changed

+360
-99
lines changed

Include/cpython/unicodeobject.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
/* --- Internal Unicode Operations ---------------------------------------- */
1313

14-
#define USE_UNICODE_WCHAR_CACHE 1
14+
#ifndef USE_UNICODE_WCHAR_CACHE
15+
# define USE_UNICODE_WCHAR_CACHE 1
16+
#endif /* USE_UNICODE_WCHAR_CACHE */
1517

1618
/* Since splitting on whitespace is an important use case, and
1719
whitespace in most situations is solely ASCII whitespace, we

Lib/test/support/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@
3636
from .testresult import get_test_runner
3737

3838

39+
try:
40+
from _testcapi import unicode_legacy_string
41+
except ImportError:
42+
unicode_legacy_string = None
43+
3944
__all__ = [
4045
# globals
4146
"PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast",
@@ -426,6 +431,9 @@ def requires_lzma(reason='requires lzma'):
426431
lzma = None
427432
return unittest.skipUnless(lzma, reason)
428433

434+
requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string,
435+
'requires legacy Unicode C API')
436+
429437
is_jython = sys.platform.startswith('java')
430438

431439
is_android = hasattr(sys, 'getandroidapilevel')

Lib/test/test_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,9 @@ def test_writerows_errors(self):
250250
self.assertRaises(OSError, writer.writerows, BadIterable())
251251

252252
@support.cpython_only
253+
@support.requires_legacy_unicode_capi
253254
def test_writerows_legacy_strings(self):
254255
import _testcapi
255-
256256
c = _testcapi.unicode_legacy_string('a')
257257
with TemporaryFile("w+", newline='') as fileobj:
258258
writer = csv.writer(fileobj)

Lib/test/test_decimal.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333
import numbers
3434
import locale
3535
from test.support import (run_unittest, run_doctest, is_resource_enabled,
36-
requires_IEEE_754, requires_docstrings)
36+
requires_IEEE_754, requires_docstrings,
37+
requires_legacy_unicode_capi)
3738
from test.support import (TestFailed,
3839
run_with_locale, cpython_only)
3940
from test.support.import_helper import import_fresh_module
@@ -582,6 +583,7 @@ def test_explicit_from_string(self):
582583
self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")
583584

584585
@cpython_only
586+
@requires_legacy_unicode_capi
585587
def test_from_legacy_strings(self):
586588
import _testcapi
587589
Decimal = self.decimal.Decimal
@@ -2817,6 +2819,7 @@ def test_none_args(self):
28172819
Overflow])
28182820

28192821
@cpython_only
2822+
@requires_legacy_unicode_capi
28202823
def test_from_legacy_strings(self):
28212824
import _testcapi
28222825
c = self.decimal.Context()

Lib/test/test_getargs2.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,6 +976,7 @@ def test_et_hash(self):
976976
buf = bytearray()
977977
self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf)
978978

979+
@support.requires_legacy_unicode_capi
979980
def test_u(self):
980981
from _testcapi import getargs_u
981982
self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
@@ -985,6 +986,7 @@ def test_u(self):
985986
self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
986987
self.assertRaises(TypeError, getargs_u, None)
987988

989+
@support.requires_legacy_unicode_capi
988990
def test_u_hash(self):
989991
from _testcapi import getargs_u_hash
990992
self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
@@ -994,6 +996,7 @@ def test_u_hash(self):
994996
self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
995997
self.assertRaises(TypeError, getargs_u_hash, None)
996998

999+
@support.requires_legacy_unicode_capi
9971000
def test_Z(self):
9981001
from _testcapi import getargs_Z
9991002
self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
@@ -1003,6 +1006,7 @@ def test_Z(self):
10031006
self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
10041007
self.assertIsNone(getargs_Z(None))
10051008

1009+
@support.requires_legacy_unicode_capi
10061010
def test_Z_hash(self):
10071011
from _testcapi import getargs_Z_hash
10081012
self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')

Lib/test/test_unicode.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ def test_isidentifier(self):
723723
self.assertFalse("0".isidentifier())
724724

725725
@support.cpython_only
726+
@support.requires_legacy_unicode_capi
726727
def test_isidentifier_legacy(self):
727728
import _testcapi
728729
u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
@@ -2350,6 +2351,7 @@ def test_getnewargs(self):
23502351
self.assertEqual(len(args), 1)
23512352

23522353
@support.cpython_only
2354+
@support.requires_legacy_unicode_capi
23532355
def test_resize(self):
23542356
from _testcapi import getargs_u
23552357
for length in range(1, 100, 7):
@@ -2920,6 +2922,7 @@ def test_copycharacters(self):
29202922
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
29212923

29222924
@support.cpython_only
2925+
@support.requires_legacy_unicode_capi
29232926
def test_encode_decimal(self):
29242927
from _testcapi import unicode_encodedecimal
29252928
self.assertEqual(unicode_encodedecimal('123'),
@@ -2936,6 +2939,7 @@ def test_encode_decimal(self):
29362939
unicode_encodedecimal, "123\u20ac", "replace")
29372940

29382941
@support.cpython_only
2942+
@support.requires_legacy_unicode_capi
29392943
def test_transform_decimal(self):
29402944
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
29412945
self.assertEqual(transform_decimal('123'),

Modules/_io/fileio.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,14 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
270270
if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
271271
return -1;
272272
}
273+
#if USE_UNICODE_WCHAR_CACHE
274+
_Py_COMP_DIAG_PUSH
275+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
273276
widename = PyUnicode_AsUnicode(stringobj);
277+
_Py_COMP_DIAG_POP
278+
#else /* USE_UNICODE_WCHAR_CACHE */
279+
widename = PyUnicode_AsWideCharString(stringobj, NULL);
280+
#endif /* USE_UNICODE_WCHAR_CACHE */
274281
if (widename == NULL)
275282
return -1;
276283
#else
@@ -491,6 +498,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
491498
internal_close(self);
492499

493500
done:
501+
#ifdef MS_WINDOWS
502+
#if !USE_UNICODE_WCHAR_CACHE
503+
PyMem_Free(widename);
504+
#endif /* USE_UNICODE_WCHAR_CACHE */
505+
#endif
494506
Py_CLEAR(stringobj);
495507
return ret;
496508
}

Modules/_testcapimodule.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1668,6 +1668,7 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)
16681668

16691669
static volatile int x;
16701670

1671+
#if USE_UNICODE_WCHAR_CACHE
16711672
/* Ignore use of deprecated APIs */
16721673
_Py_COMP_DIAG_PUSH
16731674
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
@@ -1772,6 +1773,8 @@ test_Z_code(PyObject *self, PyObject *Py_UNUSED(ignored))
17721773
Py_DECREF(tuple);
17731774
Py_RETURN_NONE;
17741775
}
1776+
_Py_COMP_DIAG_POP
1777+
#endif /* USE_UNICODE_WCHAR_CACHE */
17751778

17761779
static PyObject *
17771780
test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
@@ -1824,6 +1827,10 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
18241827
return raiseTestError("test_widechar",
18251828
"PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail");
18261829

1830+
#if USE_UNICODE_WCHAR_CACHE
1831+
/* Ignore use of deprecated APIs */
1832+
_Py_COMP_DIAG_PUSH
1833+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
18271834
wide = PyUnicode_FromUnicode(invalid, 1);
18281835
if (wide == NULL)
18291836
PyErr_Clear();
@@ -1844,11 +1851,12 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
18441851
return raiseTestError("test_widechar",
18451852
"PyUnicode_Ready() didn't fail");
18461853
}
1854+
_Py_COMP_DIAG_POP
1855+
#endif /* USE_UNICODE_WCHAR_CACHE */
18471856
#endif
18481857

18491858
Py_RETURN_NONE;
18501859
}
1851-
_Py_COMP_DIAG_POP
18521860

18531861
static PyObject *
18541862
unicode_aswidechar(PyObject *self, PyObject *args)
@@ -2024,6 +2032,7 @@ unicode_copycharacters(PyObject *self, PyObject *args)
20242032
return Py_BuildValue("(Nn)", to_copy, copied);
20252033
}
20262034

2035+
#if USE_UNICODE_WCHAR_CACHE
20272036
/* Ignore use of deprecated APIs */
20282037
_Py_COMP_DIAG_PUSH
20292038
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
@@ -2096,6 +2105,7 @@ unicode_legacy_string(PyObject *self, PyObject *args)
20962105
return u;
20972106
}
20982107
_Py_COMP_DIAG_POP
2108+
#endif /* USE_UNICODE_WCHAR_CACHE */
20992109

21002110
static PyObject *
21012111
getargs_w_star(PyObject *self, PyObject *args)
@@ -5398,8 +5408,10 @@ static PyMethodDef TestMethods[] = {
53985408
{"codec_incrementaldecoder",
53995409
(PyCFunction)codec_incrementaldecoder, METH_VARARGS},
54005410
{"test_s_code", test_s_code, METH_NOARGS},
5411+
#if USE_UNICODE_WCHAR_CACHE
54015412
{"test_u_code", test_u_code, METH_NOARGS},
54025413
{"test_Z_code", test_Z_code, METH_NOARGS},
5414+
#endif /* USE_UNICODE_WCHAR_CACHE */
54035415
{"test_widechar", test_widechar, METH_NOARGS},
54045416
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
54055417
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
@@ -5408,9 +5420,11 @@ static PyMethodDef TestMethods[] = {
54085420
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
54095421
{"unicode_findchar", unicode_findchar, METH_VARARGS},
54105422
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
5423+
#if USE_UNICODE_WCHAR_CACHE
54115424
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
54125425
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
54135426
{"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},
5427+
#endif /* USE_UNICODE_WCHAR_CACHE */
54145428
{"_test_thread_state", test_thread_state, METH_VARARGS},
54155429
{"_pending_threadfunc", pending_threadfunc, METH_VARARGS},
54165430
#ifdef HAVE_GETTIMEOFDAY

Modules/_winapi.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,11 @@ create_converter('LPCVOID', '" F_POINTER "')
164164
create_converter('BOOL', 'i') # F_BOOL used previously (always 'i')
165165
create_converter('DWORD', 'k') # F_DWORD is always "k" (which is much shorter)
166166
create_converter('LPCTSTR', 's')
167-
create_converter('LPCWSTR', 'u')
168-
create_converter('LPWSTR', 'u')
169167
create_converter('UINT', 'I') # F_UINT used previously (always 'I')
170168
169+
class LPCWSTR_converter(Py_UNICODE_converter):
170+
type = 'LPCWSTR'
171+
171172
class HANDLE_return_converter(CReturnConverter):
172173
type = 'HANDLE'
173174
@@ -197,7 +198,7 @@ class LPVOID_return_converter(CReturnConverter):
197198
data.return_conversion.append(
198199
'return_value = HANDLE_TO_PYNUM(_return_value);\n')
199200
[python start generated code]*/
200-
/*[python end generated code: output=da39a3ee5e6b4b0d input=79464c61a31ae932]*/
201+
/*[python end generated code: output=da39a3ee5e6b4b0d input=011ee0c3a2244bfe]*/
201202

202203
#include "clinic/_winapi.c.h"
203204

@@ -520,15 +521,15 @@ _winapi_CreateFileMapping_impl(PyObject *module, HANDLE file_handle,
520521
/*[clinic input]
521522
_winapi.CreateJunction
522523
523-
src_path: LPWSTR
524-
dst_path: LPWSTR
524+
src_path: LPCWSTR
525+
dst_path: LPCWSTR
525526
/
526527
[clinic start generated code]*/
527528

528529
static PyObject *
529-
_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path,
530-
LPWSTR dst_path)
531-
/*[clinic end generated code: output=66b7eb746e1dfa25 input=8cd1f9964b6e3d36]*/
530+
_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path,
531+
LPCWSTR dst_path)
532+
/*[clinic end generated code: output=44b3f5e9bbcc4271 input=963d29b44b9384a7]*/
532533
{
533534
/* Privilege adjustment */
534535
HANDLE token = NULL;

Modules/clinic/_winapi.c.h

Lines changed: 53 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)