From 699e616db735dd85fe054d7251c97e2a56a6d168 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Oct 2018 07:43:36 +0300 Subject: [PATCH 01/17] bpo-36346: Prepare for removing the legacy Unicode C API. Add two compile time options: HAVE_UNICODE_WCHAR_CACHE and USE_UNICODE_WCHAR_CACHE. USE_UNICODE_WCHAR_CACHE makes the interpreter not using the wchar_t cache and the legacy Unicode C API. HAVE_UNICODE_WCHAR_CACHE removes the wchar_t cache and the legacy Unicode C API that depends on it. --- Include/cpython/unicodeobject.h | 15 ++ Lib/test/clinic.test | 19 +- Lib/test/support/__init__.py | 8 + Lib/test/test_csv.py | 2 +- Lib/test/test_decimal.py | 7 +- Lib/test/test_getargs2.py | 4 + Lib/test/test_unicode.py | 2 + Modules/_codecsmodule.c | 22 +- Modules/_ctypes/_ctypes.c | 28 ++- Modules/_ctypes/callproc.c | 10 +- Modules/_ctypes/cfield.c | 26 ++- Modules/_io/fileio.c | 9 + Modules/_testcapimodule.c | 16 ++ Modules/_winapi.c | 92 ++++---- Modules/arraymodule.c | 79 ++++++- Modules/clinic/_winapi.c.h | 77 +++++-- Modules/clinic/arraymodule.c.h | 21 +- Modules/clinic/posixmodule.c.h | 57 ++++- Modules/overlapped.c | 89 ++++++-- Modules/posixmodule.c | 87 +++++-- Objects/unicodeobject.c | 394 ++++++++++++++++++++++---------- PC/_msi.c | 19 +- PC/clinic/winreg.c.h | 304 +++++++++++++++++++++--- PC/winreg.c | 75 ++++-- Python/dynload_win.c | 10 +- Python/fileutils.c | 20 +- Python/getargs.c | 7 + Python/traceback.c | 15 +- Tools/clinic/clinic.py | 59 ++++- 29 files changed, 1237 insertions(+), 336 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index c7634902248534..b01c55e85515a7 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -15,6 +15,9 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; /* --- Internal Unicode Operations ---------------------------------------- */ +#define HAVE_UNICODE_WCHAR_CACHE 1 +#define USE_UNICODE_WCHAR_CACHE 1 + /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we optimize for the common case by using a quick look-up table @@ -71,6 +74,7 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; /* low surrogate = bottom 10 bits added to DC00 */ #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) +#if HAVE_UNICODE_WCHAR_CACHE /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */ @@ -78,6 +82,7 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Unicode Type ------------------------------------------------------- */ @@ -218,7 +223,9 @@ typedef struct { 4 bytes (see issue #19537 on m68k). */ unsigned int :24; } state; +#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr; /* wchar_t representation (null-terminated) */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the @@ -229,8 +236,10 @@ typedef struct { Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the * terminating \0. */ char *utf8; /* UTF-8 representation (null-terminated) */ +#if HAVE_UNICODE_WCHAR_CACHE Py_ssize_t wstr_length; /* Number of code points in wstr, possible * surrogates count as two code points. */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyCompactUnicodeObject; /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the @@ -247,6 +256,8 @@ typedef struct { } PyUnicodeObject; /* Fast access macros */ +#if HAVE_UNICODE_WCHAR_CACHE + #define PyUnicode_WSTR_LENGTH(op) \ (PyUnicode_IS_COMPACT_ASCII(op) ? \ ((PyASCIIObject*)op)->length : \ @@ -285,6 +296,7 @@ typedef struct { ((const char *)(PyUnicode_AS_UNICODE(op))) /* Py_DEPRECATED(3.3) */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ @@ -1240,6 +1252,9 @@ PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); and where the hash values are equal (i.e. a very probable match) */ PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); + #ifdef __cplusplus } #endif diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index 62c2f00c6b2b98..5cb89f87b090e6 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -1871,13 +1871,26 @@ test_Py_UNICODE_converter(PyObject *module, PyObject *const *args, Py_ssize_t na const Py_UNICODE *e; Py_ssize_clean_t e_length; - if (!_PyArg_ParseStack(args, nargs, "uuZu#Z#:test_Py_UNICODE_converter", - &a, &b, &c, &d, &d_length, &e, &e_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&O&u#Z#:test_Py_UNICODE_converter", + _PyUnicode_WideCharString_Converter, &a, _PyUnicode_WideCharString_Converter, &b, _PyUnicode_WideCharString_Opt_Converter, &c, &d, &d_length, &e, &e_length)) { goto exit; } return_value = test_Py_UNICODE_converter_impl(module, a, b, c, d, d_length, e, e_length); exit: + /* Cleanup for a */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)a); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for b */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)b); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for c */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)c); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1888,7 +1901,7 @@ test_Py_UNICODE_converter_impl(PyObject *module, const Py_UNICODE *a, Py_ssize_clean_t d_length, const Py_UNICODE *e, Py_ssize_clean_t e_length) -/*[clinic end generated code: output=dd0a09a1b772e57b input=064a3b68ad7f04b0]*/ +/*[clinic end generated code: output=ef45e982fedf0b3d input=064a3b68ad7f04b0]*/ /*[clinic input] diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5bd15a2feae9d7..3b395ee7715de3 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -68,6 +68,11 @@ except ImportError: resource = None +try: + from _testcapi import unicode_legacy_string +except ImportError: + unicode_legacy_string = None + __all__ = [ # globals "PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast", @@ -814,6 +819,9 @@ def dec(*args, **kwargs): requires_lzma = unittest.skipUnless(lzma, 'requires lzma') +requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string, + 'requires legacy Unicode C API') + is_jython = sys.platform.startswith('java') is_android = hasattr(sys, 'getandroidapilevel') diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 7a333139b5ea2c..ca39a17aa56732 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -231,9 +231,9 @@ def test_writerows_with_none(self): self.assertEqual(fileobj.read(), 'a\r\n""\r\n') @support.cpython_only + @support.requires_legacy_unicode_capi def test_writerows_legacy_strings(self): import _testcapi - c = _testcapi.unicode_legacy_string('a') with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 1f37b5372a3e7e..df6c894aa97335 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -33,9 +33,10 @@ import numbers import locale from test.support import (run_unittest, run_doctest, is_resource_enabled, - requires_IEEE_754, requires_docstrings) + requires_IEEE_754, requires_docstrings, + requires_legacy_unicode_capi) from test.support import (import_fresh_module, TestFailed, - run_with_locale, cpython_only) + run_with_locale, cpython_only, get_attribute) import random import inspect import threading @@ -581,6 +582,7 @@ def test_explicit_from_string(self): self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003") @cpython_only + @requires_legacy_unicode_capi def test_from_legacy_strings(self): import _testcapi Decimal = self.decimal.Decimal @@ -2816,6 +2818,7 @@ def test_none_args(self): Overflow]) @cpython_only + @requires_legacy_unicode_capi def test_from_legacy_strings(self): import _testcapi c = self.decimal.Context() diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py index 07e2d151379182..fb451d964ebd0e 100644 --- a/Lib/test/test_getargs2.py +++ b/Lib/test/test_getargs2.py @@ -985,6 +985,7 @@ def test_et_hash(self): buf = bytearray() self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf) + @support.requires_legacy_unicode_capi def test_u(self): from _testcapi import getargs_u self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9') @@ -994,6 +995,7 @@ def test_u(self): self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview')) self.assertRaises(TypeError, getargs_u, None) + @support.requires_legacy_unicode_capi def test_u_hash(self): from _testcapi import getargs_u_hash self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9') @@ -1003,6 +1005,7 @@ def test_u_hash(self): self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview')) self.assertRaises(TypeError, getargs_u_hash, None) + @support.requires_legacy_unicode_capi def test_Z(self): from _testcapi import getargs_Z self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9') @@ -1012,6 +1015,7 @@ def test_Z(self): self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview')) self.assertIsNone(getargs_Z(None)) + @support.requires_legacy_unicode_capi def test_Z_hash(self): from _testcapi import getargs_Z_hash self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9') diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index c277e705b9f55e..7afe84c421793f 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2832,6 +2832,7 @@ def test_copycharacters(self): self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0) @support.cpython_only + @support.requires_legacy_unicode_capi def test_encode_decimal(self): from _testcapi import unicode_encodedecimal self.assertEqual(unicode_encodedecimal('123'), @@ -2848,6 +2849,7 @@ def test_encode_decimal(self): unicode_encodedecimal, "123\u20ac", "replace") @support.cpython_only + @support.requires_legacy_unicode_capi def test_transform_decimal(self): from _testcapi import unicode_transformdecimaltoascii as transform_decimal self.assertEqual(transform_decimal('123'), diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index e0d6902b18aa14..64fdf6c588757f 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -704,13 +704,13 @@ _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, return NULL; if (PyUnicode_Check(obj)) { - Py_UNICODE *u; Py_ssize_t len, size; if (PyUnicode_READY(obj) < 0) return NULL; - u = PyUnicode_AsUnicodeAndSize(obj, &len); +#if USE_UNICODE_WCHAR_CACHE + Py_UNICODE *u = PyUnicode_AsUnicodeAndSize(obj, &len); if (u == NULL) return NULL; if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) @@ -718,6 +718,24 @@ _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj, size = len * sizeof(Py_UNICODE); return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size), PyUnicode_GET_LENGTH(obj)); +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(obj, NULL, 0); + if (len < 0) { + return NULL; + } + assert(len > 0); + len--; + if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) { + return PyErr_NoMemory(); + } + size = len * sizeof(wchar_t); + PyObject *bytes = PyBytes_FromStringAndSize(NULL, size); + if (bytes == NULL) { + return NULL; + } + PyUnicode_AsWideChar(obj, (wchar_t *)PyBytes_AS_STRING(bytes), len); + return codec_tuple(bytes, PyUnicode_GET_LENGTH(obj)); +#endif /* USE_UNICODE_WCHAR_CACHE */ } else { Py_buffer view; diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 0d95d2b6f76ece..460eda2e7b44fe 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -1293,7 +1293,6 @@ static int WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored)) { Py_ssize_t result = 0; - Py_UNICODE *wstr; Py_ssize_t len; if (value == NULL) { @@ -1309,21 +1308,32 @@ WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored } else Py_INCREF(value); - wstr = PyUnicode_AsUnicodeAndSize(value, &len); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + len = PyUnicode_GetSize(value); + if (len < 0) { + Py_DECREF(value); + return -1; + } +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(value, NULL, 0); + if (len < 0) { + Py_DECREF(value); return -1; + } + assert(len > 0); + len--; +#endif /* USE_UNICODE_WCHAR_CACHE */ if ((size_t)len > self->b_size/sizeof(wchar_t)) { PyErr_SetString(PyExc_ValueError, "string too long"); - result = -1; - goto done; + Py_DECREF(value); + return -1; } result = PyUnicode_AsWideChar(value, (wchar_t *)self->b_ptr, self->b_size/sizeof(wchar_t)); if (result >= 0 && (size_t)result < self->b_size/sizeof(wchar_t)) ((wchar_t *)self->b_ptr)[result] = (wchar_t)0; - done: Py_DECREF(value); return result >= 0 ? 0 : -1; @@ -3358,10 +3368,12 @@ _validate_paramflags(PyTypeObject *type, PyObject *paramflags) for (i = 0; i < len; ++i) { PyObject *item = PyTuple_GET_ITEM(paramflags, i); int flag; - char *name; + PyObject *name = Py_None; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i|ZO", &flag, &name, &defval)) { + if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) || + !(name == Py_None || PyUnicode_Check(name))) + { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index d91e84613b2fe1..410d4f2026ef06 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1274,7 +1274,6 @@ The handle may be used to locate exported functions in this\n\ module.\n"; static PyObject *load_library(PyObject *self, PyObject *args) { - const WCHAR *name; PyObject *nameobj; PyObject *ignored; HMODULE hMod; @@ -1282,7 +1281,11 @@ static PyObject *load_library(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored)) return NULL; - name = _PyUnicode_AsUnicode(nameobj); +#if USE_UNICODE_WCHAR_CACHE + const WCHAR *name = _PyUnicode_AsUnicode(nameobj); +#else /* USE_UNICODE_WCHAR_CACHE */ + WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!name) return NULL; @@ -1290,6 +1293,9 @@ static PyObject *load_library(PyObject *self, PyObject *args) hMod = LoadLibraryW(name); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(name); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!hMod) return PyErr_SetFromWindowsErr(GetLastError()); #ifdef _WIN64 diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 5f194e21550f7b..1e480c8771a716 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -1229,7 +1229,6 @@ U_get(void *ptr, Py_ssize_t size) static PyObject * U_set(void *ptr, PyObject *value, Py_ssize_t length) { - Py_UNICODE *wstr; Py_ssize_t size; /* It's easier to calculate in characters than in bytes */ @@ -1242,19 +1241,23 @@ U_set(void *ptr, PyObject *value, Py_ssize_t length) return NULL; } - wstr = PyUnicode_AsUnicodeAndSize(value, &size); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + size = PyUnicode_GetSize(value); + if (size < 0) return NULL; +#else /* USE_UNICODE_WCHAR_CACHE */ + size = PyUnicode_AsWideChar(value, NULL, 0); + if (size < 0) + return NULL; + size--; +#endif /* USE_UNICODE_WCHAR_CACHE */ if (size > length) { PyErr_Format(PyExc_ValueError, "string too long (%zd, maximum length %zd)", size, length); return NULL; - } else if (size < length-1) - /* copy terminating NUL character if there is space */ - size += 1; - - if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, size) == -1) { + } + if (PyUnicode_AsWideChar(value, (wchar_t *)ptr, length) == -1) { return NULL; } @@ -1423,7 +1426,11 @@ BSTR_set(void *ptr, PyObject *value, Py_ssize_t size) if (value) { wchar_t* wvalue; Py_ssize_t wsize; +#if USE_UNICODE_WCHAR_CACHE wvalue = PyUnicode_AsUnicodeAndSize(value, &wsize); +#else /* USE_UNICODE_WCHAR_CACHE */ + wvalue = PyUnicode_AsWideCharString(value, &wsize); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wvalue == NULL) return NULL; if ((unsigned) wsize != wsize) { @@ -1431,6 +1438,9 @@ BSTR_set(void *ptr, PyObject *value, Py_ssize_t size) return NULL; } bstr = SysAllocStringLen(wvalue, (unsigned)wsize); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wvalue); +#endif /* USE_UNICODE_WCHAR_CACHE */ } else bstr = NULL; diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index c502c430134ef6..487edf436aed36 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -275,7 +275,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, if (!PyUnicode_FSDecoder(nameobj, &stringobj)) { return -1; } +#if USE_UNICODE_WCHAR_CACHE widename = PyUnicode_AsUnicode(stringobj); +#else /* USE_UNICODE_WCHAR_CACHE */ + widename = PyUnicode_AsWideCharString(stringobj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (widename == NULL) return -1; #else @@ -492,6 +496,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, internal_close(self); done: +#ifdef MS_WINDOWS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(widename); +#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif Py_CLEAR(stringobj); return ret; } diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 350ef771630eab..ddbc3dc80e1a19 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1410,6 +1410,7 @@ getargs_y_hash(PyObject *self, PyObject *args) return PyBytes_FromStringAndSize(str, size); } +//#if USE_UNICODE_WCHAR_CACHE static PyObject * getargs_u(PyObject *self, PyObject *args) { @@ -1453,6 +1454,7 @@ getargs_Z_hash(PyObject *self, PyObject *args) else Py_RETURN_NONE; } +// #endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_es(PyObject *self, PyObject *args) @@ -1637,6 +1639,7 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) static volatile int x; +#if USE_UNICODE_WCHAR_CACHE /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case of an error. */ @@ -1737,6 +1740,7 @@ test_Z_code(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_DECREF(tuple); Py_RETURN_NONE; } +#endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) @@ -1796,6 +1800,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail"); +#if USE_UNICODE_WCHAR_CACHE wide = PyUnicode_FromUnicode(NULL, 1); if (wide == NULL) return NULL; @@ -1809,6 +1814,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_Ready() didn't fail"); } +#endif /* USE_UNICODE_WCHAR_CACHE */ #endif Py_RETURN_NONE; @@ -1946,6 +1952,7 @@ unicode_copycharacters(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", to_copy, copied); } +#if USE_UNICODE_WCHAR_CACHE static PyObject * unicode_encodedecimal(PyObject *self, PyObject *args) { @@ -2013,6 +2020,7 @@ unicode_legacy_string(PyObject *self, PyObject *args) return u; } +#endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_w_star(PyObject *self, PyObject *args) @@ -4809,10 +4817,12 @@ static PyMethodDef TestMethods[] = { {"getargs_y", getargs_y, METH_VARARGS}, {"getargs_y_star", getargs_y_star, METH_VARARGS}, {"getargs_y_hash", getargs_y_hash, METH_VARARGS}, +// #if USE_UNICODE_WCHAR_CACHE {"getargs_u", getargs_u, METH_VARARGS}, {"getargs_u_hash", getargs_u_hash, METH_VARARGS}, {"getargs_Z", getargs_Z, METH_VARARGS}, {"getargs_Z_hash", getargs_Z_hash, METH_VARARGS}, +// #endif /* USE_UNICODE_WCHAR_CACHE */ {"getargs_w_star", getargs_w_star, METH_VARARGS}, {"getargs_es", getargs_es, METH_VARARGS}, {"getargs_et", getargs_et, METH_VARARGS}, @@ -4823,17 +4833,23 @@ static PyMethodDef TestMethods[] = { {"codec_incrementaldecoder", (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, {"test_s_code", test_s_code, METH_NOARGS}, +#if USE_UNICODE_WCHAR_CACHE {"test_u_code", test_u_code, METH_NOARGS}, {"test_Z_code", test_Z_code, METH_NOARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ {"test_widechar", test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, +#if USE_UNICODE_WCHAR_CACHE {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ +#if USE_UNICODE_WCHAR_CACHE {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ {"_test_thread_state", test_thread_state, METH_VARARGS}, {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, #ifdef HAVE_GETTIMEOFDAY diff --git a/Modules/_winapi.c b/Modules/_winapi.c index e7b221d888ef8d..28000c8dc80e06 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -164,10 +164,11 @@ create_converter('LPCVOID', '" F_POINTER "') create_converter('BOOL', 'i') # F_BOOL used previously (always 'i') create_converter('DWORD', 'k') # F_DWORD is always "k" (which is much shorter) create_converter('LPCTSTR', 's') -create_converter('LPCWSTR', 'u') -create_converter('LPWSTR', 'u') create_converter('UINT', 'I') # F_UINT used previously (always 'I') +class LPCWSTR_converter(Py_UNICODE_converter): + type = 'LPCWSTR' + class HANDLE_return_converter(CReturnConverter): type = 'HANDLE' @@ -197,7 +198,7 @@ class LPVOID_return_converter(CReturnConverter): data.return_conversion.append( 'return_value = HANDLE_TO_PYNUM(_return_value);\n') [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=79464c61a31ae932]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=011ee0c3a2244bfe]*/ #include "clinic/_winapi.c.h" @@ -512,15 +513,15 @@ _winapi_CreateFileMapping_impl(PyObject *module, HANDLE file_handle, /*[clinic input] _winapi.CreateJunction - src_path: LPWSTR - dst_path: LPWSTR + src_path: LPCWSTR + dst_path: LPCWSTR / [clinic start generated code]*/ static PyObject * -_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path, - LPWSTR dst_path) -/*[clinic end generated code: output=66b7eb746e1dfa25 input=8cd1f9964b6e3d36]*/ +_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, + LPCWSTR dst_path) +/*[clinic end generated code: output=44b3f5e9bbcc4271 input=963d29b44b9384a7]*/ { /* Privilege adjustment */ HANDLE token = NULL; @@ -752,12 +753,12 @@ gethandle(PyObject* obj, const char* name) return ret; } -static PyObject* +static wchar_t * getenvironment(PyObject* environment) { Py_ssize_t i, envsize, totalsize; - Py_UCS4 *buffer = NULL, *p, *end; - PyObject *keys, *values, *res; + wchar_t *buffer = NULL, *p, *end; + PyObject *keys, *values; /* convert environment dictionary to windows environment string */ if (! PyMapping_Check(environment)) { @@ -775,8 +776,8 @@ getenvironment(PyObject* environment) goto error; } - envsize = PySequence_Fast_GET_SIZE(keys); - if (PySequence_Fast_GET_SIZE(values) != envsize) { + envsize = PyList_GET_SIZE(keys); + if (PyList_GET_SIZE(values) != envsize) { PyErr_SetString(PyExc_RuntimeError, "environment changed size during iteration"); goto error; @@ -784,8 +785,9 @@ getenvironment(PyObject* environment) totalsize = 1; /* trailing null character */ for (i = 0; i < envsize; i++) { - PyObject* key = PySequence_Fast_GET_ITEM(keys, i); - PyObject* value = PySequence_Fast_GET_ITEM(values, i); + PyObject* key = PyList_GET_ITEM(keys, i); + PyObject* value = PyList_GET_ITEM(values, i); + Py_ssize_t size; if (! PyUnicode_Check(key) || ! PyUnicode_Check(value)) { PyErr_SetString(PyExc_TypeError, @@ -806,19 +808,25 @@ getenvironment(PyObject* environment) PyErr_SetString(PyExc_ValueError, "illegal environment variable name"); goto error; } - if (totalsize > PY_SSIZE_T_MAX - PyUnicode_GET_LENGTH(key) - 1) { + + size = PyUnicode_AsWideChar(key, NULL, 0); + assert(size > 1); + if (totalsize > PY_SSIZE_T_MAX - size) { PyErr_SetString(PyExc_OverflowError, "environment too long"); goto error; } - totalsize += PyUnicode_GET_LENGTH(key) + 1; /* +1 for '=' */ - if (totalsize > PY_SSIZE_T_MAX - PyUnicode_GET_LENGTH(value) - 1) { + totalsize += size; /* including '=' */ + + size = PyUnicode_AsWideChar(value, NULL, 0); + assert(size > 0); + if (totalsize > PY_SSIZE_T_MAX - size) { PyErr_SetString(PyExc_OverflowError, "environment too long"); goto error; } - totalsize += PyUnicode_GET_LENGTH(value) + 1; /* +1 for '\0' */ + totalsize += size; /* including trailing '\0' */ } - buffer = PyMem_NEW(Py_UCS4, totalsize); + buffer = PyMem_NEW(wchar_t, totalsize); if (! buffer) { PyErr_NoMemory(); goto error; @@ -827,31 +835,27 @@ getenvironment(PyObject* environment) end = buffer + totalsize; for (i = 0; i < envsize; i++) { - PyObject* key = PySequence_Fast_GET_ITEM(keys, i); - PyObject* value = PySequence_Fast_GET_ITEM(values, i); - if (!PyUnicode_AsUCS4(key, p, end - p, 0)) - goto error; - p += PyUnicode_GET_LENGTH(key); - *p++ = '='; - if (!PyUnicode_AsUCS4(value, p, end - p, 0)) - goto error; - p += PyUnicode_GET_LENGTH(value); - *p++ = '\0'; + PyObject* key = PyList_GET_ITEM(keys, i); + PyObject* value = PyList_GET_ITEM(values, i); + Py_ssize_t size = PyUnicode_AsWideChar(key, p, end - p); + assert(1 <= size && size < end - p); + p += size; + *p++ = L'='; + size = PyUnicode_AsWideChar(value, p, end - p); + assert(0 <= size && size < end - p); + p += size + 1; } - /* add trailing null byte */ - *p++ = '\0'; + /* add trailing null character */ + *p++ = L'\0'; assert(p == end); Py_XDECREF(keys); Py_XDECREF(values); - res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, p - buffer); - PyMem_Free(buffer); - return res; + return buffer; error: - PyMem_Free(buffer); Py_XDECREF(keys); Py_XDECREF(values); return NULL; @@ -1053,8 +1057,7 @@ _winapi_CreateProcess_impl(PyObject *module, BOOL result; PROCESS_INFORMATION pi; STARTUPINFOEXW si; - PyObject *environment = NULL; - wchar_t *wenvironment; + wchar_t *wenvironment = NULL; wchar_t *command_line_copy = NULL; AttributeList attribute_list = {0}; @@ -1071,20 +1074,11 @@ _winapi_CreateProcess_impl(PyObject *module, goto cleanup; if (env_mapping != Py_None) { - environment = getenvironment(env_mapping); - if (environment == NULL) { - goto cleanup; - } - /* contains embedded null characters */ - wenvironment = PyUnicode_AsUnicode(environment); + wenvironment = getenvironment(env_mapping); if (wenvironment == NULL) { goto cleanup; } } - else { - environment = NULL; - wenvironment = NULL; - } if (getattributelist(startup_info, "lpAttributeList", &attribute_list) < 0) goto cleanup; @@ -1131,7 +1125,7 @@ _winapi_CreateProcess_impl(PyObject *module, cleanup: PyMem_Free(command_line_copy); - Py_XDECREF(environment); + PyMem_Free(wenvironment); freeattributelist(&attribute_list); return ret; diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index a5ba27cb36e218..d139dfbaabdaaf 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -243,6 +243,7 @@ u_getitem(arrayobject *ap, Py_ssize_t i) static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { +#if USE_UNICODE_WCHAR_CACHE Py_UNICODE *p; Py_ssize_t len; @@ -256,6 +257,24 @@ u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) if (i >= 0) ((Py_UNICODE *)ap->ob_item)[i] = p[0]; return 0; +#else + PyObject *u; + Py_ssize_t len; + + if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) + return -1; + len = PyUnicode_AsWideChar(u, NULL, 0); + if (len != 2) { + if (len >= 0) { + PyErr_SetString(PyExc_TypeError, + "array item must be unicode character"); + } + return -1; + } + if (i >= 0) + PyUnicode_AsWideChar(u, &((wchar_t *)ap->ob_item)[i], 1); + return 0; +#endif /* USE_UNICODE_WCHAR_CACHE */ } @@ -1701,7 +1720,7 @@ array_array_tostring_impl(arrayobject *self) /*[clinic input] array.array.fromunicode - ustr: Py_UNICODE(zeroes=True) + arg: unicode / Extends this array with data from the unicode string ustr. @@ -1712,25 +1731,38 @@ some other type. [clinic start generated code]*/ static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length) -/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/ +array_array_fromunicode_impl(arrayobject *self, PyObject *arg) +/*[clinic end generated code: output=b691324745f39114 input=bc9c8ea0d901b328]*/ { - char typecode; + Py_ssize_t len; - typecode = self->ob_descr->typecode; + char typecode = self->ob_descr->typecode; if (typecode != 'u') { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } - if (ustr_length > 0) { + +#if USE_UNICODE_WCHAR_CACHE + len = PyUnicode_GetSize(arg); + if (len < 0) { + return NULL; + } +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(arg, NULL, 0); + if (len < 0) { + return NULL; + } + assert(len > 0); + len--; +#endif /* USE_UNICODE_WCHAR_CACHE */ + + if (len > 0) { Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + ustr_length) == -1) + if (array_resize(self, old_size + len) == -1) return NULL; - memcpy(self->ob_item + old_size * sizeof(Py_UNICODE), - ustr, ustr_length * sizeof(Py_UNICODE)); + PyUnicode_AsWideChar(arg, (wchar_t *)self->ob_item + old_size, len); } Py_RETURN_NONE; @@ -2711,8 +2743,9 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) { - Py_UNICODE *ustr; Py_ssize_t n; +#if USE_UNICODE_WCHAR_CACHE + Py_UNICODE *ustr; ustr = PyUnicode_AsUnicode(initial); if (ustr == NULL) { @@ -2736,6 +2769,30 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) memcpy(item, ustr, n); self->allocated = Py_SIZE(self); } +#else /* USE_UNICODE_WCHAR_CACHE */ + n = PyUnicode_AsWideChar(initial, NULL, 0); + if (n < 0) { + PyErr_NoMemory(); + Py_DECREF(a); + return NULL; + } + assert(n > 0); + n--; + if (n > 0) { + arrayobject *self = (arrayobject *)a; + char *item = self->ob_item; + item = (char *)PyMem_Realloc(item, n * sizeof(wchar_t)); + if (item == NULL) { + PyErr_NoMemory(); + Py_DECREF(a); + return NULL; + } + self->ob_item = item; + Py_SIZE(self) = n; + PyUnicode_AsWideChar(initial, (wchar_t*)item, n); + self->allocated = Py_SIZE(self); + } +#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (initial != NULL && array_Check(initial) && len > 0) { arrayobject *self = (arrayobject *)a; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index e21f2bc2b6fd6f..96af6530042996 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -195,8 +195,8 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na LPCWSTR name; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "" F_HANDLE "" F_POINTER "kkku:CreateFileMapping", - &file_handle, &security_attributes, &protect, &max_size_high, &max_size_low, &name)) { + if (!_PyArg_ParseStack(args, nargs, "" F_HANDLE "" F_POINTER "kkkO&:CreateFileMapping", + &file_handle, &security_attributes, &protect, &max_size_high, &max_size_low, _PyUnicode_WideCharString_Converter, &name)) { goto exit; } _return_value = _winapi_CreateFileMapping_impl(module, file_handle, security_attributes, protect, max_size_high, max_size_low, name); @@ -209,6 +209,11 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -221,23 +226,55 @@ PyDoc_STRVAR(_winapi_CreateJunction__doc__, {"CreateJunction", (PyCFunction)(void(*)(void))_winapi_CreateJunction, METH_FASTCALL, _winapi_CreateJunction__doc__}, static PyObject * -_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path, - LPWSTR dst_path); +_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, + LPCWSTR dst_path); static PyObject * _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - LPWSTR src_path; - LPWSTR dst_path; + LPCWSTR src_path; + LPCWSTR dst_path; - if (!_PyArg_ParseStack(args, nargs, "uu:CreateJunction", - &src_path, &dst_path)) { + if (!_PyArg_CheckPositional("CreateJunction", nargs, 2, 2)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("CreateJunction", 1, "str", args[0]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + src_path = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + src_path = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (src_path == NULL) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("CreateJunction", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + dst_path = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + dst_path = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (dst_path == NULL) { goto exit; } return_value = _winapi_CreateJunction_impl(module, src_path, dst_path); exit: + /* Cleanup for src_path */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)src_path); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for dst_path */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)dst_path); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -367,13 +404,22 @@ _winapi_CreateProcess(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *current_directory; PyObject *startup_info; - if (!_PyArg_ParseStack(args, nargs, "ZOOOikOZO:CreateProcess", - &application_name, &command_line, &proc_attrs, &thread_attrs, &inherit_handles, &creation_flags, &env_mapping, ¤t_directory, &startup_info)) { + if (!_PyArg_ParseStack(args, nargs, "O&OOOikOO&O:CreateProcess", + _PyUnicode_WideCharString_Opt_Converter, &application_name, &command_line, &proc_attrs, &thread_attrs, &inherit_handles, &creation_flags, &env_mapping, _PyUnicode_WideCharString_Opt_Converter, ¤t_directory, &startup_info)) { goto exit; } return_value = _winapi_CreateProcess_impl(module, application_name, command_line, proc_attrs, thread_attrs, inherit_handles, creation_flags, env_mapping, current_directory, startup_info); exit: + /* Cleanup for application_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)application_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for current_directory */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)current_directory); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -706,8 +752,8 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg LPCWSTR name; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "kiu:OpenFileMapping", - &desired_access, &inherit_handle, &name)) { + if (!_PyArg_ParseStack(args, nargs, "kiO&:OpenFileMapping", + &desired_access, &inherit_handle, _PyUnicode_WideCharString_Converter, &name)) { goto exit; } _return_value = _winapi_OpenFileMapping_impl(module, desired_access, inherit_handle, name); @@ -720,6 +766,11 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1097,4 +1148,4 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P exit: return return_value; } -/*[clinic end generated code: output=f3897898ea1da99d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b37c7990ebf35ed6 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index 5f45b7cf673473..e34afa21970d63 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -435,7 +435,7 @@ array_array_tostring(arrayobject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(array_array_fromunicode__doc__, -"fromunicode($self, ustr, /)\n" +"fromunicode($self, arg, /)\n" "--\n" "\n" "Extends this array with data from the unicode string ustr.\n" @@ -448,20 +448,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__, {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__}, static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length); +array_array_fromunicode_impl(arrayobject *self, PyObject *arg); static PyObject * -array_array_fromunicode(arrayobject *self, PyObject *arg) +array_array_fromunicode(arrayobject *self, PyObject *arg_) { PyObject *return_value = NULL; - const Py_UNICODE *ustr; - Py_ssize_clean_t ustr_length; + PyObject *arg; - if (!PyArg_Parse(arg, "u#:fromunicode", &ustr, &ustr_length)) { + if (!PyUnicode_Check(arg_)) { + _PyArg_BadArgument("fromunicode", 0, "str", arg_); goto exit; } - return_value = array_array_fromunicode_impl(self, ustr, ustr_length); + if (PyUnicode_READY(arg_) == -1) { + goto exit; + } + arg = arg_; + return_value = array_array_fromunicode_impl(self, arg); exit: return return_value; @@ -599,4 +602,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=c9a40f11f1a866fb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d955bcda11d2bd74 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 55f2cbb91a083a..5bd415f87e32d9 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1757,12 +1757,25 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k { PyObject *return_value = NULL; static const char * const _keywords[] = {"command", NULL}; - static _PyArg_Parser _parser = {"u:system", _keywords, 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "system", 0}; + PyObject *argsbuf[1]; const Py_UNICODE *command; long _return_value; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &command)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("system", 1, "str", args[0]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + command = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + command = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (command == NULL) { goto exit; } _return_value = os_system_impl(module, command); @@ -1772,6 +1785,11 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k return_value = PyLong_FromLong(_return_value); exit: + /* Cleanup for command */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)command); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -6799,19 +6817,44 @@ os_startfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject { PyObject *return_value = NULL; static const char * const _keywords[] = {"filepath", "operation", NULL}; - static _PyArg_Parser _parser = {"O&|u:startfile", _keywords, 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "startfile", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t filepath = PATH_T_INITIALIZE("startfile", "filepath", 0, 0); const Py_UNICODE *operation = NULL; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - path_converter, &filepath, &operation)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &filepath)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("startfile", 2, "str", args[1]); goto exit; } + #if USE_UNICODE_WCHAR_CACHE + operation = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + operation = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (operation == NULL) { + goto exit; + } +skip_optional_pos: return_value = os_startfile_impl(module, &filepath, operation); exit: /* Cleanup for filepath */ path_cleanup(&filepath); + /* Cleanup for operation */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)operation); + #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -8480,4 +8523,4 @@ os_getrandom(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject #ifndef OS_GETRANDOM_METHODDEF #define OS_GETRANDOM_METHODDEF #endif /* !defined(OS_GETRANDOM_METHODDEF) */ -/*[clinic end generated code: output=1a9c62f5841221ae input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bf771259c746b660 input=a9049054013a1b77]*/ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index e5a209bf758297..6b407a4e6566f7 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -356,23 +356,44 @@ overlapped_CreateEvent(PyObject *self, PyObject *args) PyObject *EventAttributes; BOOL ManualReset; BOOL InitialState; + PyObject *Name_obj = NULL; Py_UNICODE *Name; HANDLE Event; - if (!PyArg_ParseTuple(args, "O" F_BOOL F_BOOL "Z", + if (!PyArg_ParseTuple(args, "O" F_BOOL F_BOOL "O", &EventAttributes, &ManualReset, - &InitialState, &Name)) + &InitialState, &Name_obj)) return NULL; if (EventAttributes != Py_None) { PyErr_SetString(PyExc_ValueError, "EventAttributes must be None"); return NULL; } + if (Name_obj == Py_None) { + Name = NULL; + } + else if (PyUnicode_Check(Name_obj)) { +#if USE_UNICODE_WCHAR_CACHE + Name = (wchar_t *)_PyUnicode_AsUnicode(Name_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Name = PyUnicode_AsWideCharString(Name_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Name == NULL) { + return NULL; + } + } + else { + _PyArg_BadArgument("CreateEvent", 4, "str or None", Name_obj); + return NULL; + } Py_BEGIN_ALLOW_THREADS Event = CreateEventW(NULL, ManualReset, InitialState, Name); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Name); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (Event == NULL) return SetFromWindowsErr(0); return Py_BuildValue(F_HANDLE, Event); @@ -1085,6 +1106,7 @@ Overlapped_AcceptEx(OverlappedObject *self, PyObject *args) static int parse_address(PyObject *obj, SOCKADDR *Address, int Length) { + PyObject *Host_obj; Py_UNICODE *Host; unsigned short Port; unsigned long FlowInfo; @@ -1092,33 +1114,66 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) memset(Address, 0, Length); - if (PyArg_ParseTuple(obj, "uH", &Host, &Port)) - { + switch (PyTuple_GET_SIZE(obj)) { + case 2: { + if (!PyArg_ParseTuple(obj, "UH", &Host_obj, &Port)) { + return -1; + } +#if USE_UNICODE_WCHAR_CACHE + Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Host = PyUnicode_AsWideCharString(Host_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Host == NULL) { + return -1; + } Address->sa_family = AF_INET; if (WSAStringToAddressW(Host, AF_INET, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); - return -1; + Length = -1; } - ((SOCKADDR_IN*)Address)->sin_port = htons(Port); + else { + ((SOCKADDR_IN*)Address)->sin_port = htons(Port); + } +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Host); +#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } - else if (PyArg_ParseTuple(obj, - "uHkk;ConnectEx(): illegal address_as_bytes " - "argument", &Host, &Port, &FlowInfo, &ScopeId)) - { - PyErr_Clear(); + case 4: { + if (!PyArg_ParseTuple(obj, + "UHkk;ConnectEx(): illegal address_as_bytes argument", + &Host_obj, &Port, &FlowInfo, &ScopeId)) + { + return -1; + } +#if USE_UNICODE_WCHAR_CACHE + Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Host = PyUnicode_AsWideCharString(Host_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Host == NULL) { + return -1; + } Address->sa_family = AF_INET6; if (WSAStringToAddressW(Host, AF_INET6, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); - return -1; + Length = -1; + } + else { + ((SOCKADDR_IN6*)Address)->sin6_port = htons(Port); + ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; + ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; } - ((SOCKADDR_IN6*)Address)->sin6_port = htons(Port); - ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; - ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Host); +#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } - - return -1; + default: + PyErr_SetString(PyExc_ValueError, "illegal address_as_bytes argument"); + return -1; + } } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 3f760183575aac..f225d70f91cbaa 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -900,6 +900,10 @@ typedef struct { static void path_cleanup(path_t *path) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((wchar_t *)path->wide); + path->wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ Py_CLEAR(path->object); Py_CLEAR(path->cleanup); } @@ -914,7 +918,7 @@ path_converter(PyObject *o, void *p) const char *narrow; #ifdef MS_WINDOWS PyObject *wo = NULL; - const wchar_t *wide; + wchar_t *wide = NULL; #endif #define FORMAT_EXCEPTION(exc, fmt) \ @@ -988,7 +992,11 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS +#if USE_UNICODE_WCHAR_CACHE wide = PyUnicode_AsUnicodeAndSize(o, &length); +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = PyUnicode_AsWideCharString(o, &length); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1004,6 +1012,9 @@ path_converter(PyObject *o, void *p) path->wide = wide; path->narrow = FALSE; path->fd = -1; +#if !USE_UNICODE_WCHAR_CACHE + wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ goto success_exit; #else if (!PyUnicode_FSConverter(o, &bytes)) { @@ -1079,7 +1090,12 @@ path_converter(PyObject *o, void *p) goto error_exit; } +#if USE_UNICODE_WCHAR_CACHE wide = PyUnicode_AsUnicodeAndSize(wo, &length); +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = PyUnicode_AsWideCharString(wo, &length); + Py_DECREF(wo); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1093,8 +1109,12 @@ path_converter(PyObject *o, void *p) } path->wide = wide; path->narrow = TRUE; - path->cleanup = wo; Py_DECREF(bytes); +#if USE_UNICODE_WCHAR_CACHE + path->cleanup = wo; +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ #else path->wide = NULL; path->narrow = narrow; @@ -1118,7 +1138,11 @@ path_converter(PyObject *o, void *p) Py_XDECREF(o); Py_XDECREF(bytes); #ifdef MS_WINDOWS +#if USE_UNICODE_WCHAR_CACHE Py_XDECREF(wo); +#else /* USE_UNICODE_WCHAR_CACHE */ + PyMem_Free(wide); +#endif /* USE_UNICODE_WCHAR_CACHE */ #endif return 0; } @@ -9722,7 +9746,6 @@ static PyObject * os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) /*[clinic end generated code: output=d29a567d6b2327d2 input=ba586581c2e6105f]*/ { - const wchar_t *env; Py_ssize_t size; /* Search from index 1 because on Windows starting '=' is allowed for @@ -9733,14 +9756,33 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) PyErr_SetString(PyExc_ValueError, "illegal environment variable name"); return NULL; } - PyObject *unicode = PyUnicode_FromFormat("%U=%U", name, value); - if (unicode == NULL) { + PyObject *buffer = PyUnicode_FromFormat("%U=%U", name, value); + if (buffer == NULL) { return NULL; } - env = PyUnicode_AsUnicodeAndSize(unicode, &size); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *env = PyUnicode_AsUnicodeAndSize(buffer, &size); if (env == NULL) goto error; +#else /* USE_UNICODE_WCHAR_CACHE */ + size = PyUnicode_AsWideChar(buffer, NULL, 0); + if (size < 0) { + return NULL; + } + if ((size_t)size > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) { + return PyErr_NoMemory(); + } + PyObject *bytes = PyBytes_FromStringAndSize(NULL, size * sizeof(wchar_t)); + if (bytes == NULL) { + goto error; + } + wchar_t *env = (wchar_t *)PyBytes_AS_STRING(bytes); + size = PyUnicode_AsWideChar(buffer, env, size); + assert(size >= 0); + Py_DECREF(buffer); + buffer = bytes; +#endif /* USE_UNICODE_WCHAR_CACHE */ if (size > _MAX_ENV) { PyErr_Format(PyExc_ValueError, "the environment variable is longer than %u characters", @@ -9757,11 +9799,11 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value) goto error; } - posix_putenv_garbage_setitem(name, unicode); + posix_putenv_garbage_setitem(name, buffer); Py_RETURN_NONE; error: - Py_DECREF(unicode); + Py_DECREF(buffer); return NULL; } #else /* MS_WINDOWS */ @@ -12178,7 +12220,12 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) #ifdef MS_WINDOWS if (!PyUnicode_FSDecoder(self->path, &ub)) return NULL; +#if USE_UNICODE_WCHAR_CACHE const wchar_t *path = PyUnicode_AsUnicode(ub); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *path = PyUnicode_AsWideCharString(ub, NULL); + Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ #else /* POSIX */ if (!PyUnicode_FSConverter(self->path, &ub)) return NULL; @@ -12188,6 +12235,11 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) result = fstatat(self->dir_fd, path, &st, follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); #else +#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE + PyMem_Free(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); return NULL; #endif /* HAVE_FSTATAT */ @@ -12200,7 +12252,11 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) else result = LSTAT(path, &st); } +#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE + PyMem_Free(path); +#else /* USE_UNICODE_WCHAR_CACHE */ Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); @@ -12373,15 +12429,21 @@ os_DirEntry_inode_impl(DirEntry *self) #ifdef MS_WINDOWS if (!self->got_file_index) { PyObject *unicode; - const wchar_t *path; STRUCT_STAT stat; int result; if (!PyUnicode_FSDecoder(self->path, &unicode)) return NULL; - path = PyUnicode_AsUnicode(unicode); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *path = PyUnicode_AsUnicode(unicode); result = LSTAT(path, &stat); Py_DECREF(unicode); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *path = PyUnicode_AsWideCharString(unicode, NULL); + Py_DECREF(unicode); + result = LSTAT(path, &stat); + PyMem_Free(path); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); @@ -12966,10 +13028,9 @@ os_scandir_impl(PyObject *module, path_t *path) iterator->dirp = NULL; #endif - memcpy(&iterator->path, path, sizeof(path_t)); /* Move the ownership to iterator->path */ - path->object = NULL; - path->cleanup = NULL; + memcpy(&iterator->path, path, sizeof(path_t)); + memset(path, 0, sizeof(path_t)); #ifdef MS_WINDOWS iterator->first_time = 1; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8141ce757412cb..8f2f62fcbec8b4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -108,10 +108,12 @@ extern "C" { PyUnicode_IS_COMPACT_ASCII(op) ? \ ((PyASCIIObject*)(op))->length : \ _PyUnicode_UTF8_LENGTH(op)) +#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_WSTR(op) \ (((PyASCIIObject*)(op))->wstr) #define _PyUnicode_WSTR_LENGTH(op) \ (((PyCompactUnicodeObject*)(op))->wstr_length) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ #define _PyUnicode_LENGTH(op) \ (((PyASCIIObject *)(op))->length) #define _PyUnicode_STATE(op) \ @@ -138,9 +140,11 @@ extern "C" { (assert(_PyUnicode_CHECK(op)), \ assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) +#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_SHARE_WSTR(op) \ (assert(_PyUnicode_CHECK(op)), \ (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ @@ -149,12 +153,14 @@ extern "C" { && _PyUnicode_UTF8(op) \ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) +#if HAVE_UNICODE_WCHAR_CACHE /* true if the Unicode object has an allocated wstr memory block (not shared with other data) */ #define _PyUnicode_HAS_WSTR_MEMORY(op) \ ((_PyUnicode_WSTR(op) && \ (!PyUnicode_IS_READY(op) || \ _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end @@ -431,6 +437,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) PyUnicodeObject *unicode = (PyUnicodeObject *)op; data = unicode->data.any; +#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { ASSERT(ascii->length == 0); ASSERT(ascii->hash == -1); @@ -442,7 +449,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) ASSERT(data == NULL); ASSERT(compact->utf8 == NULL); } - else { + else +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + { ASSERT(kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); @@ -457,6 +466,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) ASSERT (compact->utf8 != data); } } +#if HAVE_UNICODE_WCHAR_CACHE if (kind != PyUnicode_WCHAR_KIND) { if ( #if SIZEOF_WCHAR_T == 2 @@ -471,11 +481,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) } else ASSERT(ascii->wstr != data); } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (compact->utf8 == NULL) ASSERT(compact->utf8_length == 0); +#if HAVE_UNICODE_WCHAR_CACHE if (ascii->wstr == NULL) ASSERT(compact->wstr_length == 0); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /* check that the best kind is used */ if (check_content && kind != PyUnicode_WCHAR_KIND) @@ -857,11 +870,13 @@ ensure_unicode(PyObject *obj) #include "stringlib/find_max_char.h" #include "stringlib/undef.h" +#if HAVE_UNICODE_WCHAR_CACHE #include "stringlib/unicodedefs.h" #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/undef.h" +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Unicode Object ----------------------------------------------------- */ @@ -920,7 +935,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; - int share_wstr; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -935,7 +949,9 @@ resize_compact(PyObject *unicode, Py_ssize_t length) struct_size = sizeof(PyASCIIObject); else struct_size = sizeof(PyCompactUnicodeObject); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#if HAVE_UNICODE_WCHAR_CACHE + int share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -961,6 +977,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_NewReference(unicode); _PyUnicode_LENGTH(unicode) = length; +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); if (!PyUnicode_IS_ASCII(unicode)) @@ -972,6 +989,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = 0; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif @@ -984,14 +1002,13 @@ resize_compact(PyObject *unicode, Py_ssize_t length) static int resize_inplace(PyObject *unicode, Py_ssize_t length) { - wchar_t *wstr; Py_ssize_t new_size; assert(!PyUnicode_IS_COMPACT(unicode)); assert(Py_REFCNT(unicode) == 1); if (PyUnicode_IS_READY(unicode)) { Py_ssize_t char_size; - int share_wstr, share_utf8; + int share_utf8; void *data; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -999,7 +1016,9 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) data = _PyUnicode_DATA_ANY(unicode); char_size = PyUnicode_KIND(unicode); - share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#if HAVE_UNICODE_WCHAR_CACHE + int share_wstr = _PyUnicode_SHARE_WSTR(unicode); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ share_utf8 = _PyUnicode_SHARE_UTF8(unicode); if (length > (PY_SSIZE_T_MAX / char_size - 1)) { @@ -1021,10 +1040,12 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } _PyUnicode_DATA_ANY(unicode) = data; +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = data; _PyUnicode_WSTR_LENGTH(unicode) = length; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (share_utf8) { _PyUnicode_UTF8(unicode) = data; _PyUnicode_UTF8_LENGTH(unicode) = length; @@ -1034,11 +1055,16 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { +#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; +#if HAVE_UNICODE_WCHAR_CACHE } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } +#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ @@ -1047,7 +1073,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } new_size = sizeof(wchar_t) * (length + 1); - wstr = _PyUnicode_WSTR(unicode); + wchar_t *wstr = _PyUnicode_WSTR(unicode); wstr = PyObject_REALLOC(wstr, new_size); if (!wstr) { PyErr_NoMemory(); @@ -1056,6 +1082,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) _PyUnicode_WSTR(unicode) = wstr; _PyUnicode_WSTR(unicode)[length] = 0; _PyUnicode_WSTR_LENGTH(unicode) = length; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; } @@ -1064,20 +1091,8 @@ static PyObject* resize_copy(PyObject *unicode, Py_ssize_t length) { Py_ssize_t copy_length; - if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { - PyObject *copy; - - assert(PyUnicode_IS_READY(unicode)); - - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (copy == NULL) - return NULL; - - copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); - return copy; - } - else { +#if HAVE_UNICODE_WCHAR_CACHE + if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) { PyObject *w; w = (PyObject*)_PyUnicode_New(length); @@ -1089,6 +1104,18 @@ resize_copy(PyObject *unicode, Py_ssize_t length) copy_length * sizeof(wchar_t)); return w; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + PyObject *copy; + + assert(PyUnicode_IS_READY(unicode)); + + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (copy == NULL) + return NULL; + + copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); + return copy; } /* We allocate one more byte to make sure the string is @@ -1103,15 +1130,13 @@ resize_copy(PyObject *unicode, Py_ssize_t length) static PyUnicodeObject * _PyUnicode_New(Py_ssize_t length) { - PyUnicodeObject *unicode; - size_t new_size; - /* Optimization for empty strings */ - if (length == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return (PyUnicodeObject*)unicode_empty; + if (length == 0) { + _Py_INCREF_UNICODE_EMPTY(); + return (PyUnicodeObject *)unicode_empty; } +#if HAVE_UNICODE_WCHAR_CACHE /* Ensure we won't overflow the size. */ if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { return (PyUnicodeObject *)PyErr_NoMemory(); @@ -1122,10 +1147,10 @@ _PyUnicode_New(Py_ssize_t length) return NULL; } - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); + PyUnicodeObject *unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) return NULL; - new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); _PyUnicode_WSTR_LENGTH(unicode) = length; _PyUnicode_HASH(unicode) = -1; @@ -1158,6 +1183,11 @@ _PyUnicode_New(Py_ssize_t length) assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); return unicode; +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_SetString(PyExc_SystemError, + "_PyUnicode_New() with non-zero size is not supported"); + return NULL; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } static const char* @@ -1239,15 +1269,21 @@ _PyUnicode_Dump(PyObject *op) } else data = unicode->data.any; - printf("%s: len=%" PY_FORMAT_SIZE_T "u, ", + printf("%s: len=%" PY_FORMAT_SIZE_T "u", unicode_kind_name(op), ascii->length); +#if HAVE_UNICODE_WCHAR_CACHE + printf(", "); if (ascii->wstr == data) printf("shared "); printf("wstr=%p", ascii->wstr); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { - printf(" (%" PY_FORMAT_SIZE_T "u), ", compact->wstr_length); +#if HAVE_UNICODE_WCHAR_CACHE + printf(" (%" PY_FORMAT_SIZE_T "u)", compact->wstr_length); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + printf(", "); if (!ascii->state.compact && compact->utf8 == unicode->data.any) printf("shared "); printf("utf8=%p (%" PY_FORMAT_SIZE_T "u)", @@ -1264,7 +1300,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) PyCompactUnicodeObject *unicode; void *data; enum PyUnicode_Kind kind; - int is_sharing, is_ascii; + int is_ascii; Py_ssize_t char_size; Py_ssize_t struct_size; @@ -1275,23 +1311,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) } is_ascii = 0; - is_sharing = 0; struct_size = sizeof(PyCompactUnicodeObject); if (maxchar < 128) { kind = PyUnicode_1BYTE_KIND; - char_size = 1; is_ascii = 1; struct_size = sizeof(PyASCIIObject); } else if (maxchar < 256) { kind = PyUnicode_1BYTE_KIND; - char_size = 1; } else if (maxchar < 65536) { kind = PyUnicode_2BYTE_KIND; - char_size = 2; - if (sizeof(wchar_t) == 2) - is_sharing = 1; } else { if (maxchar > MAX_UNICODE) { @@ -1300,10 +1330,11 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) return NULL; } kind = PyUnicode_4BYTE_KIND; - char_size = 4; - if (sizeof(wchar_t) == 4) - is_sharing = 1; } + char_size = kind; +#if HAVE_UNICODE_WCHAR_CACHE + int is_sharing = (sizeof(wchar_t) == kind); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the size. */ if (size < 0) { @@ -1339,29 +1370,35 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) _PyUnicode_STATE(unicode).ascii = is_ascii; if (is_ascii) { ((char*)data)[size] = 0; +#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR(unicode) = NULL; - } - else if (kind == PyUnicode_1BYTE_KIND) { - ((char*)data)[size] = 0; - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; - unicode->utf8 = NULL; - unicode->utf8_length = 0; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } else { unicode->utf8 = NULL; unicode->utf8_length = 0; - if (kind == PyUnicode_2BYTE_KIND) - ((Py_UCS2*)data)[size] = 0; - else /* kind == PyUnicode_4BYTE_KIND */ - ((Py_UCS4*)data)[size] = 0; - if (is_sharing) { - _PyUnicode_WSTR_LENGTH(unicode) = size; - _PyUnicode_WSTR(unicode) = (wchar_t *)data; + if (kind == PyUnicode_1BYTE_KIND) { + ((char*)data)[size] = 0; +#if HAVE_UNICODE_WCHAR_CACHE + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } else { - _PyUnicode_WSTR_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = NULL; + if (kind == PyUnicode_2BYTE_KIND) + ((Py_UCS2*)data)[size] = 0; + else /* kind == PyUnicode_4BYTE_KIND */ + ((Py_UCS4*)data)[size] = 0; +#if HAVE_UNICODE_WCHAR_CACHE + if (is_sharing) { + _PyUnicode_WSTR_LENGTH(unicode) = size; + _PyUnicode_WSTR(unicode) = (wchar_t *)data; + } + else { + _PyUnicode_WSTR_LENGTH(unicode) = 0; + _PyUnicode_WSTR(unicode) = NULL; + } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } } #ifdef Py_DEBUG @@ -1675,12 +1712,14 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, int _PyUnicode_Ready(PyObject *unicode) { +#if HAVE_UNICODE_WCHAR_CACHE wchar_t *end; Py_UCS4 maxchar = 0; Py_ssize_t num_surrogates; #if SIZEOF_WCHAR_T == 2 Py_ssize_t length_wo_surrogates; #endif +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* _PyUnicode_Ready() is only intended for old-style API usage where strings were created using _PyObject_New() and where no canonical @@ -1688,12 +1727,15 @@ _PyUnicode_Ready(PyObject *unicode) which are not yet ready. */ assert(_PyUnicode_CHECK(unicode)); assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); +#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_DATA_ANY(unicode) == NULL); assert(_PyUnicode_UTF8(unicode) == NULL); /* Actually, it should neither be interned nor be anything else: */ assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); +#if HAVE_UNICODE_WCHAR_CACHE end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, &maxchar, &num_surrogates) == -1) @@ -1796,6 +1838,7 @@ _PyUnicode_Ready(PyObject *unicode) #endif PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_STATE(unicode).ready = 1; assert(_PyUnicode_CheckConsistency(unicode, 1)); return 0; @@ -1824,8 +1867,10 @@ unicode_dealloc(PyObject *unicode) Py_FatalError("Inconsistent interned string state."); } +#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) PyObject_DEL(_PyUnicode_WSTR(unicode)); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) PyObject_DEL(_PyUnicode_UTF8(unicode)); if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) @@ -1883,9 +1928,11 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) assert(PyUnicode_Check(unicode)); assert(0 <= length); +#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) old_length = PyUnicode_WSTR_LENGTH(unicode); else +#endif /* HAVE_UNICODE_WCHAR_CACHE */ old_length = PyUnicode_GET_LENGTH(unicode); if (old_length == length) return 0; @@ -2975,9 +3022,11 @@ unicode_get_widechar_size(PyObject *unicode) assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); +#if USE_UNICODE_WCHAR_CACHE if (_PyUnicode_WSTR(unicode) != NULL) { return PyUnicode_WSTR_LENGTH(unicode); } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(PyUnicode_IS_READY(unicode)); res = _PyUnicode_LENGTH(unicode); @@ -2998,16 +3047,21 @@ unicode_get_widechar_size(PyObject *unicode) static void unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size) { - const wchar_t *wstr; - assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); - wstr = _PyUnicode_WSTR(unicode); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wstr = _PyUnicode_WSTR(unicode); if (wstr != NULL) { memcpy(w, wstr, size * sizeof(wchar_t)); return; } +#else /* USE_UNICODE_WCHAR_CACHE */ + if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) { + memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t)); + return; + } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(PyUnicode_IS_READY(unicode)); if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { @@ -3122,6 +3176,74 @@ PyUnicode_AsWideCharString(PyObject *unicode, #endif /* HAVE_WCHAR_H */ +int +_PyUnicode_WideCharString_Converter(PyObject *obj, void *ptr) +{ + wchar_t **p = (wchar_t **)ptr; + if (obj == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(*p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + *p = NULL; + return 1; + } + if (PyUnicode_Check(obj)) { +#if USE_UNICODE_WCHAR_CACHE + *p = (wchar_t *)_PyUnicode_AsUnicode(obj); + if (*p == NULL) { + return 0; + } + return 1; +#else /* USE_UNICODE_WCHAR_CACHE */ + *p = PyUnicode_AsWideCharString(obj, NULL); + if (*p == NULL) { + return 0; + } + return Py_CLEANUP_SUPPORTED; +#endif /* USE_UNICODE_WCHAR_CACHE */ + } + PyErr_Format(PyExc_TypeError, + "argument must be str, not %.50s", + obj->ob_type->tp_name); + return 0; +} + +int +_PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr) +{ + wchar_t **p = (wchar_t **)ptr; + if (obj == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(*p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + *p = NULL; + return 1; + } + if (obj == Py_None) { + *p = NULL; + return 1; + } + if (PyUnicode_Check(obj)) { +#if USE_UNICODE_WCHAR_CACHE + *p = (wchar_t *)_PyUnicode_AsUnicode(obj); + if (*p == NULL) { + return 0; + } + return 1; +#else /* USE_UNICODE_WCHAR_CACHE */ + *p = PyUnicode_AsWideCharString(obj, NULL); + if (*p == NULL) { + return 0; + } + return Py_CLEANUP_SUPPORTED; +#endif /* USE_UNICODE_WCHAR_CACHE */ + } + PyErr_Format(PyExc_TypeError, + "argument must be str or None, not %.50s", + obj->ob_type->tp_name); + return 0; +} + PyObject * PyUnicode_FromOrdinal(int ordinal) { @@ -3906,6 +4028,7 @@ PyUnicode_AsUTF8(PyObject *unicode) Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) { +#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; @@ -3935,6 +4058,10 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) if (size != NULL) *size = PyUnicode_WSTR_LENGTH(unicode); return w; +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_SetString(PyExc_SystemError, "PyUnicode_AsUnicodeAndSize is not supported"); + return NULL; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } Py_UNICODE * @@ -3961,6 +4088,7 @@ _PyUnicode_AsUnicode(PyObject *unicode) Py_ssize_t PyUnicode_GetSize(PyObject *unicode) { +#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); goto onError; @@ -3973,6 +4101,10 @@ PyUnicode_GetSize(PyObject *unicode) onError: return -1; +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_SetString(PyExc_SystemError, "PyUnicode_GetSize is not supported"); + return -1; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } Py_ssize_t @@ -4104,7 +4236,6 @@ unicode_decode_call_errorhandler_wchar( Py_ssize_t requiredsize; Py_ssize_t newpos; PyObject *inputobj = NULL; - wchar_t *repwstr; Py_ssize_t repwlen; if (*errorHandler == NULL) { @@ -4150,9 +4281,16 @@ unicode_decode_call_errorhandler_wchar( goto onError; } - repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); - if (repwstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + repwlen = PyUnicode_GetSize(repunicode); + if (repwlen < 0) goto onError; +#else /* USE_UNICODE_WCHAR_CACHE */ + repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0); + if (repwlen < 0) + goto onError; + repwlen--; +#endif /* USE_UNICODE_WCHAR_CACHE */ /* need more space? (at least enough for what we have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space @@ -4172,7 +4310,7 @@ unicode_decode_call_errorhandler_wchar( goto onError; } } - wcsncpy(*buf + *outpos, repwstr, repwlen); + PyUnicode_AsWideChar(repunicode, *buf + *outpos, repwlen); *outpos += repwlen; *endinpos = newpos; *inptr = *input + newpos; @@ -7461,6 +7599,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, /* Create a substring so that we can get the UTF-16 representation of just the slice under consideration. */ PyObject *substring; + int ret = -1; assert(len > 0); @@ -7472,11 +7611,19 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, substring = PyUnicode_Substring(unicode, offset, offset+len); if (substring == NULL) return -1; +#if USE_UNICODE_WCHAR_CACHE p = PyUnicode_AsUnicodeAndSize(substring, &size); if (p == NULL) { Py_DECREF(substring); return -1; } +#else /* USE_UNICODE_WCHAR_CACHE */ + p = PyUnicode_AsWideCharString(substring, &size); + Py_CLEAR(substring); + if (p == NULL) { + return -1; + } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(size <= INT_MAX); /* First get the size of the result */ @@ -7488,16 +7635,15 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, goto error; /* If we used a default char, then we failed! */ if (pusedDefaultChar && *pusedDefaultChar) { - Py_DECREF(substring); - return -2; + ret = -2; + goto done; } if (*outbytes == NULL) { /* Create string object */ *outbytes = PyBytes_FromStringAndSize(NULL, outsize); if (*outbytes == NULL) { - Py_DECREF(substring); - return -1; + goto done; } out = PyBytes_AS_STRING(*outbytes); } @@ -7506,12 +7652,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, const Py_ssize_t n = PyBytes_Size(*outbytes); if (outsize > PY_SSIZE_T_MAX - n) { PyErr_NoMemory(); - Py_DECREF(substring); - return -1; + goto done; } if (_PyBytes_Resize(outbytes, n + outsize) < 0) { - Py_DECREF(substring); - return -1; + goto done; } out = PyBytes_AS_STRING(*outbytes) + n; } @@ -7521,19 +7665,29 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, p, (int)size, out, outsize, NULL, pusedDefaultChar); - Py_CLEAR(substring); if (outsize <= 0) goto error; - if (pusedDefaultChar && *pusedDefaultChar) - return -2; - return 0; + if (pusedDefaultChar && *pusedDefaultChar) { + ret = -2; + goto done; + } + ret = 0; + +done: +#if USE_UNICODE_WCHAR_CACHE + Py_DECREF(substring); +#else /* USE_UNICODE_WCHAR_CACHE */ + PyMem_Free(p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return ret; error: - Py_XDECREF(substring); - if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) - return -2; + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) { + ret = -2; + goto done; + } PyErr_SetFromWindowsErr(0); - return -1; + goto done; } /* @@ -11019,10 +11173,11 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) Py_ssize_t i; int kind; Py_UCS4 chr; - const unsigned char *ustr = (const unsigned char *)str; assert(_PyUnicode_CHECK(uni)); +#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_IS_READY(uni)) { + const unsigned char *ustr = (const unsigned char *)str; const wchar_t *ws = _PyUnicode_WSTR(uni); /* Compare Unicode string and source character set string */ for (i = 0; (chr = ws[i]) && ustr[i]; i++) { @@ -11037,6 +11192,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) return -1; /* str is longer */ return 0; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { const void *data = PyUnicode_1BYTE_DATA(uni); @@ -11074,6 +11230,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } } +#if HAVE_UNICODE_WCHAR_CACHE static int non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) { @@ -11091,6 +11248,7 @@ non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) } return 1; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ int _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) @@ -11103,11 +11261,13 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) assert((unsigned char)*p < 128); } #endif +#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(unicode) == -1) { /* Memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(unicode, str); } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(unicode)) return 0; len = (size_t)PyUnicode_GET_LENGTH(unicode); @@ -11129,11 +11289,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) } #endif +#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(left) == -1) { /* memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(left, right->string); } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(left)) return 0; @@ -11403,7 +11565,7 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) } /* -Wraps stringlib_parse_args_finds() and additionally ensures that the +Wraps asciilib_parse_args_finds() and additionally ensures that the first argument is a unicode object. */ @@ -11412,7 +11574,7 @@ parse_args_finds_unicode(const char * function_name, PyObject *args, PyObject **substring, Py_ssize_t *start, Py_ssize_t *end) { - if(stringlib_parse_args_finds(function_name, args, substring, + if(asciilib_parse_args_finds(function_name, args, substring, start, end)) { if (ensure_unicode(*substring) < 0) return 0; @@ -13387,7 +13549,7 @@ unicode_startswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) + if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13441,7 +13603,7 @@ unicode_endswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) + if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13875,10 +14037,12 @@ unicode_sizeof_impl(PyObject *self) size += (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); } +#if HAVE_UNICODE_WCHAR_CACHE /* If the wstr pointer is present, account for it unless it is shared with the data pointer. Check if the data is not shared. */ if (_PyUnicode_HAS_WSTR_MEMORY(self)) size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(self)) size += PyUnicode_UTF8_LENGTH(self) + 1; @@ -15043,8 +15207,8 @@ static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *unicode, *self; - Py_ssize_t length, char_size; - int share_wstr, share_utf8; + Py_ssize_t length; + int share_utf8; unsigned int kind; void *data; @@ -15078,37 +15242,35 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; _PyUnicode_STATE(self).ready = 1; - _PyUnicode_WSTR(self) = NULL; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; +#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR_LENGTH(self) = 0; + _PyUnicode_WSTR(self) = NULL; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; - share_wstr = 0; +#if HAVE_UNICODE_WCHAR_CACHE + int share_wstr = 0; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (kind == PyUnicode_1BYTE_KIND) { - char_size = 1; if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) share_utf8 = 1; } - else if (kind == PyUnicode_2BYTE_KIND) { - char_size = 2; - if (sizeof(wchar_t) == 2) - share_wstr = 1; - } +#if HAVE_UNICODE_WCHAR_CACHE else { - assert(kind == PyUnicode_4BYTE_KIND); - char_size = 4; - if (sizeof(wchar_t) == 4) + if (sizeof(wchar_t) == kind) share_wstr = 1; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the length. */ - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + if (length > (PY_SSIZE_T_MAX / kind - 1)) { PyErr_NoMemory(); goto onError; } - data = PyObject_MALLOC((length + 1) * char_size); + data = PyObject_MALLOC((length + 1) * kind); if (data == NULL) { PyErr_NoMemory(); goto onError; @@ -15119,10 +15281,12 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_UTF8_LENGTH(self) = length; _PyUnicode_UTF8(self) = data; } +#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR_LENGTH(self) = length; _PyUnicode_WSTR(self) = (wchar_t *)data; } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1)); @@ -15449,7 +15613,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), it->it_seq, it->it_index); } else { - PyObject *u = (PyObject *)_PyUnicode_New(0); + PyObject *u = PyUnicode_New(0, 0); if (u == NULL) return NULL; return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); @@ -15631,30 +15795,12 @@ Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) Py_UNICODE* PyUnicode_AsUnicodeCopy(PyObject *unicode) { - Py_UNICODE *u, *copy; - Py_ssize_t len, size; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - u = PyUnicode_AsUnicodeAndSize(unicode, &len); - if (u == NULL) - return NULL; - /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { - PyErr_NoMemory(); - return NULL; - } - size = len + 1; /* copy the null character */ - size *= sizeof(Py_UNICODE); - copy = PyMem_Malloc(size); - if (copy == NULL) { - PyErr_NoMemory(); + Py_ssize_t len; +#if USE_UNICODE_WCHAR_CACHE + if (PyUnicode_AsUnicode(unicode) == NULL) return NULL; - } - memcpy(copy, u, size); - return copy; +#endif /* USE_UNICODE_WCHAR_CACHE */ + return PyUnicode_AsWideCharString(unicode, &len); } /* A _string module, to export formatter_parser and formatter_field_name_split diff --git a/PC/_msi.c b/PC/_msi.c index ae30acbc9b48d4..0cd5398ec67552 100644 --- a/PC/_msi.c +++ b/PC/_msi.c @@ -421,11 +421,21 @@ record_setstring(msiobj* record, PyObject *args) { int status; int field; + PyObject *data_obj; wchar_t *data; - if (!PyArg_ParseTuple(args, "iu:SetString", &field, &data)) + if (!PyArg_ParseTuple(args, "iU:SetString", &field, &data_obj)) return NULL; +#if USE_UNICODE_WCHAR_CACHE + data = (wchar_t *)_PyUnicode_AsUnicode(data_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + data = PyUnicode_AsWideCharString(data_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (data == NULL) { + return NULL; + } + if ((status = MsiRecordSetStringW(record->h, field, data)) != ERROR_SUCCESS) return msierror(status); @@ -622,12 +632,19 @@ summary_setproperty(msiobj* si, PyObject *args) return NULL; if (PyUnicode_Check(data)) { +#if USE_UNICODE_WCHAR_CACHE const WCHAR *value = _PyUnicode_AsUnicode(data); +#else /* USE_UNICODE_WCHAR_CACHE */ + WCHAR *value = PyUnicode_AsWideCharString(data, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { return NULL; } status = MsiSummaryInfoSetPropertyW(si->h, field, VT_LPSTR, 0, NULL, value); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (PyLong_CheckExact(data)) { long value = PyLong_AsLong(data); if (value == -1 && PyErr_Occurred()) { diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 50210250ed1967..666108e481d292 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -152,8 +152,27 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs HKEY key; HKEY _return_value; - if (!_PyArg_ParseStack(args, nargs, "ZO&:ConnectRegistry", - &computer_name, clinic_HKEY_converter, &key)) { + if (!_PyArg_CheckPositional("ConnectRegistry", nargs, 2, 2)) { + goto exit; + } + if (args[0] == Py_None) { + computer_name = NULL; + } + else if (PyUnicode_Check(args[0])) { + #if USE_UNICODE_WCHAR_CACHE + computer_name = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + computer_name = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (computer_name == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("ConnectRegistry", 1, "str or None", args[0]); + goto exit; + } + if (!clinic_HKEY_converter(args[1], &key)) { goto exit; } _return_value = winreg_ConnectRegistry_impl(module, computer_name, key); @@ -163,6 +182,11 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for computer_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)computer_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -199,8 +223,27 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *sub_key; HKEY _return_value; - if (!_PyArg_ParseStack(args, nargs, "O&Z:CreateKey", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("CreateKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + sub_key = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("CreateKey", 2, "str or None", args[1]); goto exit; } _return_value = winreg_CreateKey_impl(module, key, sub_key); @@ -210,6 +253,11 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -251,7 +299,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:CreateKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:CreateKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -259,7 +307,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_CreateKeyEx_impl(module, key, sub_key, reserved, access); @@ -269,6 +317,11 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -303,13 +356,32 @@ winreg_DeleteKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; - if (!_PyArg_ParseStack(args, nargs, "O&u:DeleteKey", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("DeleteKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("DeleteKey", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { goto exit; } return_value = winreg_DeleteKey_impl(module, key, sub_key); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -351,19 +423,24 @@ winreg_DeleteKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "access", "reserved", NULL}; - static _PyArg_Parser _parser = {"O&u|ii:DeleteKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:DeleteKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; REGSAM access = KEY_WOW64_64KEY; int reserved = 0; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &access, &reserved)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Converter, &sub_key, &access, &reserved)) { goto exit; } return_value = winreg_DeleteKeyEx_impl(module, key, sub_key, access, reserved); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -391,13 +468,37 @@ winreg_DeleteValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *value; - if (!_PyArg_ParseStack(args, nargs, "O&Z:DeleteValue", - clinic_HKEY_converter, &key, &value)) { + if (!_PyArg_CheckPositional("DeleteValue", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + value = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + value = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + value = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (value == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("DeleteValue", 2, "str or None", args[1]); goto exit; } return_value = winreg_DeleteValue_impl(module, key, value); exit: + /* Cleanup for value */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)value); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -527,12 +628,26 @@ winreg_ExpandEnvironmentStrings(PyObject *module, PyObject *arg) PyObject *return_value = NULL; const Py_UNICODE *string; - if (!PyArg_Parse(arg, "u:ExpandEnvironmentStrings", &string)) { + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("ExpandEnvironmentStrings", 0, "str", arg); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + string = _PyUnicode_AsUnicode(arg); + #else /* USE_UNICODE_WCHAR_CACHE */ + string = PyUnicode_AsWideCharString(arg, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (string == NULL) { goto exit; } return_value = winreg_ExpandEnvironmentStrings_impl(module, string); exit: + /* Cleanup for string */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)string); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -619,13 +734,48 @@ winreg_LoadKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *sub_key; const Py_UNICODE *file_name; - if (!_PyArg_ParseStack(args, nargs, "O&uu:LoadKey", - clinic_HKEY_converter, &key, &sub_key, &file_name)) { + if (!_PyArg_CheckPositional("LoadKey", nargs, 3, 3)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("LoadKey", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + if (!PyUnicode_Check(args[2])) { + _PyArg_BadArgument("LoadKey", 3, "str", args[2]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + file_name = _PyUnicode_AsUnicode(args[2]); + #else /* USE_UNICODE_WCHAR_CACHE */ + file_name = PyUnicode_AsWideCharString(args[2], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (file_name == NULL) { goto exit; } return_value = winreg_LoadKey_impl(module, key, sub_key, file_name); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for file_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)file_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -660,7 +810,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:OpenKey", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:OpenKey", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -668,7 +818,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_OpenKey_impl(module, key, sub_key, reserved, access); @@ -678,6 +828,11 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -712,7 +867,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:OpenKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:OpenKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -720,7 +875,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_OpenKeyEx_impl(module, key, sub_key, reserved, access); @@ -730,6 +885,11 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -802,13 +962,37 @@ winreg_QueryValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; - if (!_PyArg_ParseStack(args, nargs, "O&Z:QueryValue", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("QueryValue", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + sub_key = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + sub_key = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("QueryValue", 2, "str or None", args[1]); goto exit; } return_value = winreg_QueryValue_impl(module, key, sub_key); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -841,13 +1025,37 @@ winreg_QueryValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *name; - if (!_PyArg_ParseStack(args, nargs, "O&Z:QueryValueEx", - clinic_HKEY_converter, &key, &name)) { + if (!_PyArg_CheckPositional("QueryValueEx", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + name = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + name = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + name = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (name == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("QueryValueEx", 2, "str or None", args[1]); goto exit; } return_value = winreg_QueryValueEx_impl(module, key, name); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -885,13 +1093,32 @@ winreg_SaveKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *file_name; - if (!_PyArg_ParseStack(args, nargs, "O&u:SaveKey", - clinic_HKEY_converter, &key, &file_name)) { + if (!_PyArg_CheckPositional("SaveKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("SaveKey", 2, "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + file_name = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + file_name = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (file_name == NULL) { goto exit; } return_value = winreg_SaveKey_impl(module, key, file_name); exit: + /* Cleanup for file_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)file_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -925,25 +1152,23 @@ PyDoc_STRVAR(winreg_SetValue__doc__, {"SetValue", (PyCFunction)(void(*)(void))winreg_SetValue, METH_FASTCALL, winreg_SetValue__doc__}, static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, - DWORD type, const Py_UNICODE *value, - Py_ssize_clean_t value_length); +winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, + DWORD type, PyObject *value_obj); static PyObject * winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; HKEY key; - const Py_UNICODE *sub_key; + PyObject *sub_key_obj; DWORD type; - const Py_UNICODE *value; - Py_ssize_clean_t value_length; + PyObject *value_obj; - if (!_PyArg_ParseStack(args, nargs, "O&Zku#:SetValue", - clinic_HKEY_converter, &key, &sub_key, &type, &value, &value_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&OkU:SetValue", + clinic_HKEY_converter, &key, &sub_key_obj, &type, &value_obj)) { goto exit; } - return_value = winreg_SetValue_impl(module, key, sub_key, type, value, value_length); + return_value = winreg_SetValue_impl(module, key, sub_key_obj, type, value_obj); exit: return return_value; @@ -1010,13 +1235,18 @@ winreg_SetValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) DWORD type; PyObject *value; - if (!_PyArg_ParseStack(args, nargs, "O&ZOkO:SetValueEx", - clinic_HKEY_converter, &key, &value_name, &reserved, &type, &value)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&OkO:SetValueEx", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &value_name, &reserved, &type, &value)) { goto exit; } return_value = winreg_SetValueEx_impl(module, key, value_name, reserved, type, value); exit: + /* Cleanup for value_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)value_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1121,4 +1351,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=1204d20c543b5b4a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d32916ff805bd452 input=a9049054013a1b77]*/ diff --git a/PC/winreg.c b/PC/winreg.c index 3a6ea3689fd12a..cc4c75a8f7526b 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -621,16 +621,22 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) for (j = 0; j < i; j++) { PyObject *t; - wchar_t *wstr; Py_ssize_t len; t = PyList_GET_ITEM(value, j); if (!PyUnicode_Check(t)) return FALSE; - wstr = PyUnicode_AsUnicodeAndSize(t, &len); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE + len = PyUnicode_GetSize(t); + if (len < 0) return FALSE; - size += Py_SAFE_DOWNCAST((len + 1) * sizeof(wchar_t), + len++; +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(t, NULL, 0); + if (len < 0) + return FALSE; +#endif /* USE_UNICODE_WCHAR_CACHE */ + size += Py_SAFE_DOWNCAST(len * sizeof(wchar_t), size_t, DWORD); } @@ -646,17 +652,18 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) for (j = 0; j < i; j++) { PyObject *t; - wchar_t *wstr; Py_ssize_t len; t = PyList_GET_ITEM(value, j); - wstr = PyUnicode_AsUnicodeAndSize(t, &len); - assert(wstr); - wcscpy(P, wstr); - P += (len + 1); + assert(size > 0); + len = PyUnicode_AsWideChar(t, P, size); + assert(len >= 0); + assert(len < size); + size -= (DWORD)len + 1; + P += len + 1; } /* And doubly-terminate the list... */ - *P = '\0'; + *P = L'\0'; break; } case REG_BINARY: @@ -1573,12 +1580,12 @@ winreg.SetValue key: HKEY An already open key, or any one of the predefined HKEY_* constants. - sub_key: Py_UNICODE(accept={str, NoneType}) + sub_key as sub_key_obj: object A string that names the subkey with which the value is associated. type: DWORD An integer that specifies the type of the data. Currently this must be REG_SZ, meaning only strings are supported. - value: Py_UNICODE(zeroes=True) + value as value_obj: unicode A string that specifies the new value. / @@ -1596,11 +1603,13 @@ KEY_SET_VALUE access. [clinic start generated code]*/ static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, - DWORD type, const Py_UNICODE *value, - Py_ssize_clean_t value_length) -/*[clinic end generated code: output=686bedb1cbb4367b input=2cd2adab79339c53]*/ +winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, + DWORD type, PyObject *value_obj) +/*[clinic end generated code: output=e1c0674b77ba195b input=525af06a623c9ad9]*/ { + wchar_t *sub_key = NULL; + wchar_t *value = NULL; + Py_ssize_t value_length; long rc; if (type != REG_SZ) { @@ -1609,9 +1618,41 @@ winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, return NULL; } +#if USE_UNICODE_WCHAR_CACHE + if (!_PyUnicode_UNICODE_Converter(sub_key_obj, &sub_key)) { + return NULL; + } + value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); + if (value == NULL) { + return NULL; + } +#else /* USE_UNICODE_WCHAR_CACHE */ + if (!_PyUnicode_WideCharString_Converter(sub_key_obj, &sub_key)) { + return NULL; + } + value = PyUnicode_AsWideCharString(value_obj, &value_length); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (value == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(sub_key); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return NULL; + } + if ((Py_ssize_t)(DWORD)value_length != value_length) { + PyErr_SetString(PyExc_OverflowError, "too long string"); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(sub_key); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return NULL; + } + Py_BEGIN_ALLOW_THREADS - rc = RegSetValueW(key, sub_key, REG_SZ, value, value_length+1); + rc = RegSetValueW(key, sub_key, REG_SZ, value, (DWORD)value_length+1); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); + PyMem_Free(sub_key); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegSetValue"); Py_RETURN_NONE; diff --git a/Python/dynload_win.c b/Python/dynload_win.c index 36918c3579d92d..050c561d3d8d3a 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -190,13 +190,16 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, { dl_funcptr p; char funcname[258], *import_python; - const wchar_t *wpathname; #ifndef _DEBUG _Py_CheckPython3(); #endif - wpathname = _PyUnicode_AsUnicode(pathname); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpathname == NULL) return NULL; @@ -225,6 +228,9 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, #if HAVE_SXS _Py_DeactivateActCtx(cookie); #endif +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpathname); +#endif /* USE_UNICODE_WCHAR_CACHE */ /* restore old error mode settings */ SetErrorMode(old_mode); diff --git a/Python/fileutils.c b/Python/fileutils.c index 75e015afaec32b..44961b08afbb9b 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1003,15 +1003,21 @@ _Py_stat(PyObject *path, struct stat *statbuf) #ifdef MS_WINDOWS int err; struct _stat wstatbuf; - const wchar_t *wpath; - wpath = _PyUnicode_AsUnicode(path); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpath = _PyUnicode_AsUnicode(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return -2; err = _wstat(wpath, &wstatbuf); if (!err) statbuf->st_mode = wstatbuf.st_mode; +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ return err; #else int ret; @@ -1395,7 +1401,6 @@ _Py_fopen_obj(PyObject *path, const char *mode) FILE *f; int async_err = 0; #ifdef MS_WINDOWS - const wchar_t *wpath; wchar_t wmode[10]; int usize; @@ -1407,7 +1412,11 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_TYPE(path)); return NULL; } - wpath = _PyUnicode_AsUnicode(path); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpath = _PyUnicode_AsUnicode(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return NULL; @@ -1424,6 +1433,9 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_END_ALLOW_THREADS } while (f == NULL && errno == EINTR && !(async_err = PyErr_CheckSignals())); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ #else PyObject *bytes; char *path_bytes; diff --git a/Python/getargs.c b/Python/getargs.c index 693a29cced4239..c01042c3e7771f 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1070,6 +1070,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'u': /* raw unicode buffer (Py_UNICODE *) */ case 'Z': /* raw unicode buffer or None */ { +#if HAVE_UNICODE_WCHAR_CACHE Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); if (*format == '#') { @@ -1108,6 +1109,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return converterr(c == 'Z' ? "str or None" : "str", arg, msgbuf, bufsize); } +#else /* HAVE_UNICODE_WCHAR_CACHE */ + PyErr_Format(PyExc_SystemError, + "unsupported PyArg_Parse format: \"%s\"", + format-1); + RETURN_ERR_OCCURRED; +#endif /* HAVE_UNICODE_WCHAR_CACHE */ break; } diff --git a/Python/traceback.c b/Python/traceback.c index bd1061ed43b1e1..709be52de7243d 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -674,7 +674,9 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; +#if USE_UNICODE_WCHAR_CACHE wchar_t *wstr = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -682,13 +684,16 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; +#if USE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) return; size = ((PyCompactUnicodeObject *)text)->wstr_length; } - else if (ascii->state.compact) { + else +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; else @@ -709,10 +714,12 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { - if (kind != PyUnicode_WCHAR_KIND) - ch = PyUnicode_READ(kind, data, i); - else +#if USE_UNICODE_WCHAR_CACHE + if (kind == PyUnicode_WCHAR_KIND) ch = wstr[i]; + else +#endif /* USE_UNICODE_WCHAR_CACHE */ + ch = PyUnicode_READ(kind, data, i); if (' ' <= ch && ch <= 126) { /* printable ASCII character */ char c = (char)ch; diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index cb2ded4649dce2..2969b4b44431c8 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -3351,20 +3351,75 @@ def parse_arg(self, argname, argnum): """.format(argname=argname, paramname=self.name, argnum=argnum) return super().parse_arg(argname, argnum) +@add_legacy_c_converter('u') @add_legacy_c_converter('u#', zeroes=True) @add_legacy_c_converter('Z', accept={str, NoneType}) @add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True) class Py_UNICODE_converter(CConverter): type = 'const Py_UNICODE *' default_type = (str, Null, NoneType) - format_unit = 'u' def converter_init(self, *, accept={str}, zeroes=False): format_unit = 'Z' if accept=={str, NoneType} else 'u' if zeroes: format_unit += '#' self.length = True - self.format_unit = format_unit + self.format_unit = format_unit + else: + self.accept = accept + if accept == {str}: + self.converter = '_PyUnicode_WideCharString_Converter' + elif accept == {str, NoneType}: + self.converter = '_PyUnicode_WideCharString_Opt_Converter' + else: + fail("Py_UNICODE_converter: illegal 'accept' argument " + repr(accept)) + + def cleanup(self): + if not self.length: + return """\ +#if !USE_UNICODE_WCHAR_CACHE +PyMem_Free((void *){name}); +#endif /* USE_UNICODE_WCHAR_CACHE */ +""".format(name=self.name) + + def parse_arg(self, argname, argnum): + if not self.length: + if self.accept == {str}: + return """ + if (!PyUnicode_Check({argname})) {{{{ + _PyArg_BadArgument("{{name}}", {argnum}, "str", {argname}); + goto exit; + }}}} + #if USE_UNICODE_WCHAR_CACHE + {paramname} = _PyUnicode_AsUnicode({argname}); + #else /* USE_UNICODE_WCHAR_CACHE */ + {paramname} = PyUnicode_AsWideCharString({argname}, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if ({paramname} == NULL) {{{{ + goto exit; + }}}} + """.format(argname=argname, paramname=self.name, argnum=argnum) + elif self.accept == {str, NoneType}: + return """ + if ({argname} == Py_None) {{{{ + {paramname} = NULL; + }}}} + else if (PyUnicode_Check({argname})) {{{{ + #if USE_UNICODE_WCHAR_CACHE + {paramname} = _PyUnicode_AsUnicode({argname}); + #else /* USE_UNICODE_WCHAR_CACHE */ + {paramname} = PyUnicode_AsWideCharString({argname}, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if ({paramname} == NULL) {{{{ + goto exit; + }}}} + }}}} + else {{{{ + _PyArg_BadArgument("{{name}}", {argnum}, "str or None", {argname}); + goto exit; + }}}} + """.format(argname=argname, paramname=self.name, argnum=argnum) + return super().parse_arg(argname, argnum) @add_legacy_c_converter('s*', accept={str, buffer}) @add_legacy_c_converter('z*', accept={str, buffer, NoneType}) From 60b89c9a7425ab99919d5f7ce9b71397ebe4c653 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Mar 2019 11:48:43 +0200 Subject: [PATCH 02/17] Fix winreg.SetValue(). --- PC/clinic/winreg.c.h | 17 +++++++++++------ PC/winreg.c | 27 ++++++--------------------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 666108e481d292..cb7a4564713e55 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -1152,7 +1152,7 @@ PyDoc_STRVAR(winreg_SetValue__doc__, {"SetValue", (PyCFunction)(void(*)(void))winreg_SetValue, METH_FASTCALL, winreg_SetValue__doc__}, static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, +winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, DWORD type, PyObject *value_obj); static PyObject * @@ -1160,17 +1160,22 @@ winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; HKEY key; - PyObject *sub_key_obj; + const Py_UNICODE *sub_key; DWORD type; PyObject *value_obj; - if (!_PyArg_ParseStack(args, nargs, "O&OkU:SetValue", - clinic_HKEY_converter, &key, &sub_key_obj, &type, &value_obj)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&kU:SetValue", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &type, &value_obj)) { goto exit; } - return_value = winreg_SetValue_impl(module, key, sub_key_obj, type, value_obj); + return_value = winreg_SetValue_impl(module, key, sub_key, type, value_obj); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1351,4 +1356,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=d32916ff805bd452 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d710dde7327c59e7 input=a9049054013a1b77]*/ diff --git a/PC/winreg.c b/PC/winreg.c index cc4c75a8f7526b..20d298b37e22f8 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -1580,7 +1580,7 @@ winreg.SetValue key: HKEY An already open key, or any one of the predefined HKEY_* constants. - sub_key as sub_key_obj: object + sub_key: Py_UNICODE(accept={str, NoneType}) A string that names the subkey with which the value is associated. type: DWORD An integer that specifies the type of the data. Currently this must @@ -1603,12 +1603,10 @@ KEY_SET_VALUE access. [clinic start generated code]*/ static PyObject * -winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, +winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, DWORD type, PyObject *value_obj) -/*[clinic end generated code: output=e1c0674b77ba195b input=525af06a623c9ad9]*/ +/*[clinic end generated code: output=d4773dc9c372311a input=bf088494ae2d24fd]*/ { - wchar_t *sub_key = NULL; - wchar_t *value = NULL; Py_ssize_t value_length; long rc; @@ -1619,29 +1617,17 @@ winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, } #if USE_UNICODE_WCHAR_CACHE - if (!_PyUnicode_UNICODE_Converter(sub_key_obj, &sub_key)) { - return NULL; - } - value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); - if (value == NULL) { - return NULL; - } + const wchar_t *value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); #else /* USE_UNICODE_WCHAR_CACHE */ - if (!_PyUnicode_WideCharString_Converter(sub_key_obj, &sub_key)) { - return NULL; - } - value = PyUnicode_AsWideCharString(value_obj, &value_length); + wchar_t *value = PyUnicode_AsWideCharString(value_obj, &value_length); #endif /* USE_UNICODE_WCHAR_CACHE */ if (value == NULL) { -#if !USE_UNICODE_WCHAR_CACHE - PyMem_Free(sub_key); -#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } if ((Py_ssize_t)(DWORD)value_length != value_length) { PyErr_SetString(PyExc_OverflowError, "too long string"); #if !USE_UNICODE_WCHAR_CACHE - PyMem_Free(sub_key); + PyMem_Free(value); #endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } @@ -1651,7 +1637,6 @@ winreg_SetValue_impl(PyObject *module, HKEY key, PyObject *sub_key_obj, Py_END_ALLOW_THREADS #if !USE_UNICODE_WCHAR_CACHE PyMem_Free(value); - PyMem_Free(sub_key); #endif /* USE_UNICODE_WCHAR_CACHE */ if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegSetValue"); From f386b63e143e0b2d507957695dc2f3a2d9054bc6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Mar 2019 12:02:51 +0200 Subject: [PATCH 03/17] Clean up some ifdefs in _testcapimodule. --- Modules/_testcapimodule.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index ddbc3dc80e1a19..b8b0392a3dc155 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1410,7 +1410,6 @@ getargs_y_hash(PyObject *self, PyObject *args) return PyBytes_FromStringAndSize(str, size); } -//#if USE_UNICODE_WCHAR_CACHE static PyObject * getargs_u(PyObject *self, PyObject *args) { @@ -1454,7 +1453,6 @@ getargs_Z_hash(PyObject *self, PyObject *args) else Py_RETURN_NONE; } -// #endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_es(PyObject *self, PyObject *args) @@ -4817,12 +4815,10 @@ static PyMethodDef TestMethods[] = { {"getargs_y", getargs_y, METH_VARARGS}, {"getargs_y_star", getargs_y_star, METH_VARARGS}, {"getargs_y_hash", getargs_y_hash, METH_VARARGS}, -// #if USE_UNICODE_WCHAR_CACHE {"getargs_u", getargs_u, METH_VARARGS}, {"getargs_u_hash", getargs_u_hash, METH_VARARGS}, {"getargs_Z", getargs_Z, METH_VARARGS}, {"getargs_Z_hash", getargs_Z_hash, METH_VARARGS}, -// #endif /* USE_UNICODE_WCHAR_CACHE */ {"getargs_w_star", getargs_w_star, METH_VARARGS}, {"getargs_es", getargs_es, METH_VARARGS}, {"getargs_et", getargs_et, METH_VARARGS}, @@ -4846,8 +4842,6 @@ static PyMethodDef TestMethods[] = { #if USE_UNICODE_WCHAR_CACHE {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, -#endif /* USE_UNICODE_WCHAR_CACHE */ -#if USE_UNICODE_WCHAR_CACHE {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, #endif /* USE_UNICODE_WCHAR_CACHE */ {"_test_thread_state", test_thread_state, METH_VARARGS}, From 11e0e0c7baa1b6ae8d2815d5767f7c296c4e7f7d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 19 Mar 2019 12:17:39 +0200 Subject: [PATCH 04/17] Make path_cleanup() paranoidally safer. --- Modules/posixmodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f225d70f91cbaa..0bde61a357e273 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -901,8 +901,9 @@ static void path_cleanup(path_t *path) { #if !USE_UNICODE_WCHAR_CACHE - PyMem_Free((wchar_t *)path->wide); + wchar_t *wide = (wchar_t *)path->wide; path->wide = NULL; + PyMem_Free(wide); #endif /* USE_UNICODE_WCHAR_CACHE */ Py_CLEAR(path->object); Py_CLEAR(path->cleanup); From 8750d48b6de939a4ece79037519c21196c0a35ed Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 20 Mar 2019 21:37:04 +0200 Subject: [PATCH 05/17] Fix os.scandir(). --- Modules/posixmodule.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 0bde61a357e273..b36a7a7f2ef1a3 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -12236,11 +12236,7 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) result = fstatat(self->dir_fd, path, &st, follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); #else -#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE - PyMem_Free(path); -#else /* USE_UNICODE_WCHAR_CACHE */ Py_DECREF(ub); -#endif /* USE_UNICODE_WCHAR_CACHE */ PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); return NULL; #endif /* HAVE_FSTATAT */ @@ -13053,9 +13049,9 @@ os_scandir_impl(PyObject *module, path_t *path) #else /* POSIX */ errno = 0; #ifdef HAVE_FDOPENDIR - if (path->fd != -1) { + if (iterator->path.fd != -1) { /* closedir() closes the FD, so we duplicate it */ - fd = _Py_dup(path->fd); + fd = _Py_dup(iterator->path.fd); if (fd == -1) goto error; From 3b5294a2514218c8288ae4152acba1323aa111d7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 09:25:51 +0300 Subject: [PATCH 06/17] Silence compiler warnings. --- Modules/arraymodule.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 3a978d1ace14c2..191d23346be369 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1692,10 +1692,13 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *arg) } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS len = PyUnicode_GetSize(arg); if (len < 0) { return NULL; } +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ len = PyUnicode_AsWideChar(arg, NULL, 0); if (len < 0) { @@ -2695,6 +2698,8 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) else if (initial != NULL && PyUnicode_Check(initial)) { Py_ssize_t n; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Py_UNICODE *ustr; ustr = PyUnicode_AsUnicode(initial); @@ -2719,6 +2724,7 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) memcpy(item, ustr, n); self->allocated = Py_SIZE(self); } +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ n = PyUnicode_AsWideChar(initial, NULL, 0); if (n < 0) { From e7898fa7d4c41c452bb283f1e41338dc18c3206d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 09:43:47 +0300 Subject: [PATCH 07/17] Fix PyUnicode_IsIdentifier for the cache-less build. --- Lib/test/test_unicode.py | 2 ++ Objects/unicodeobject.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index cf27d6750b8534..e47a416d5370f7 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -721,6 +721,7 @@ def test_isidentifier(self): self.assertFalse("0".isidentifier()) @support.cpython_only + @support.requires_legacy_unicode_capi def test_isidentifier_legacy(self): import _testcapi u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' @@ -2346,6 +2347,7 @@ def test_getnewargs(self): self.assertEqual(len(args), 1) @support.cpython_only + @support.requires_legacy_unicode_capi def test_resize(self): from _testcapi import getargs_u for length in range(1, 100, 7): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bcc75ef6885ff4..6a2ad437e87bd3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12549,11 +12549,14 @@ _PyUnicode_ScanIdentifier(PyObject *self) int PyUnicode_IsIdentifier(PyObject *self) { +#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_IS_READY(self)) { +#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_ssize_t i = _PyUnicode_ScanIdentifier(self); Py_ssize_t len = PyUnicode_GET_LENGTH(self); /* an empty string is not a valid identifier */ return len && i == len; +#if HAVE_UNICODE_WCHAR_CACHE } else { _Py_COMP_DIAG_PUSH @@ -12597,6 +12600,7 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS return 1; _Py_COMP_DIAG_POP } +#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /*[clinic input] From fd641c69a44883fc54a9bae1c474d949806392f2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 10:27:42 +0300 Subject: [PATCH 08/17] Silence compiler warnings on Windows. --- Modules/_ctypes/callproc.c | 3 +++ Modules/_io/fileio.c | 3 +++ Modules/overlapped.c | 11 ++++++++++- Modules/posixmodule.c | 12 ++++++++++++ Objects/unicodeobject.c | 6 ++++++ PC/_msi.c | 6 ++++++ PC/winreg.c | 6 ++++++ Python/dynload_win.c | 3 +++ Python/fileutils.c | 6 ++++++ 9 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index ce1d41c9079088..b4a493ba47d1fd 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1309,7 +1309,10 @@ static PyObject *load_library(PyObject *self, PyObject *args) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const WCHAR *name = _PyUnicode_AsUnicode(nameobj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index e44e4f30f805c3..b9856b3b631657 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -271,7 +271,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, return -1; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS widename = PyUnicode_AsUnicode(stringobj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ widename = PyUnicode_AsWideCharString(stringobj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index e7f72b3e070004..95bdfbfdff0393 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -386,7 +386,10 @@ overlapped_CreateEvent(PyObject *self, PyObject *args) } else if (PyUnicode_Check(Name_obj)) { #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Name = (wchar_t *)_PyUnicode_AsUnicode(Name_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ Name = PyUnicode_AsWideCharString(Name_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -395,7 +398,7 @@ overlapped_CreateEvent(PyObject *self, PyObject *args) } } else { - _PyArg_BadArgument("CreateEvent", 4, "str or None", Name_obj); + _PyArg_BadArgument("CreateEvent", "argument 4", "str or None", Name_obj); return NULL; } @@ -1256,7 +1259,10 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) return -1; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ Host = PyUnicode_AsWideCharString(Host_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -1284,7 +1290,10 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) return -1; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ Host = PyUnicode_AsWideCharString(Host_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index e83c3477719768..efd99544f5a997 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1081,7 +1081,10 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_AsUnicodeAndSize(o, &length); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wide = PyUnicode_AsWideCharString(o, &length); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -1179,7 +1182,10 @@ path_converter(PyObject *o, void *p) } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_AsUnicodeAndSize(wo, &length); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wide = PyUnicode_AsWideCharString(wo, &length); Py_DECREF(wo); @@ -12850,7 +12856,10 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) if (!PyUnicode_FSDecoder(self->path, &ub)) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *path = PyUnicode_AsUnicode(ub); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *path = PyUnicode_AsWideCharString(ub, NULL); Py_DECREF(ub); @@ -13076,9 +13085,12 @@ os_DirEntry_inode_impl(DirEntry *self) if (!PyUnicode_FSDecoder(self->path, &unicode)) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *path = PyUnicode_AsUnicode(unicode); result = LSTAT(path, &stat); Py_DECREF(unicode); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *path = PyUnicode_AsWideCharString(unicode, NULL); Py_DECREF(unicode); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 6a2ad437e87bd3..33d359f08788ce 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4468,9 +4468,12 @@ unicode_decode_call_errorhandler_wchar( } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS repwlen = PyUnicode_GetSize(repunicode); if (repwlen < 0) goto onError; +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0); if (repwlen < 0) @@ -7811,11 +7814,14 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, if (substring == NULL) return -1; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS p = PyUnicode_AsUnicodeAndSize(substring, &size); if (p == NULL) { Py_DECREF(substring); return -1; } +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ p = PyUnicode_AsWideCharString(substring, &size); Py_CLEAR(substring); diff --git a/PC/_msi.c b/PC/_msi.c index 5f105ed899f25d..3444c9be849c5d 100644 --- a/PC/_msi.c +++ b/PC/_msi.c @@ -428,7 +428,10 @@ record_setstring(msiobj* record, PyObject *args) return NULL; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS data = (wchar_t *)_PyUnicode_AsUnicode(data_obj); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ data = PyUnicode_AsWideCharString(data_obj, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -636,7 +639,10 @@ summary_setproperty(msiobj* si, PyObject *args) if (PyUnicode_Check(data)) { #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const WCHAR *value = _PyUnicode_AsUnicode(data); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ WCHAR *value = PyUnicode_AsWideCharString(data, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/PC/winreg.c b/PC/winreg.c index 7e9b6d08bfeea4..2581a4694d4156 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -646,10 +646,13 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) if (!PyUnicode_Check(t)) return FALSE; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS len = PyUnicode_GetSize(t); if (len < 0) return FALSE; len++; +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ len = PyUnicode_AsWideChar(t, NULL, 0); if (len < 0) @@ -1707,7 +1710,10 @@ winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *value = PyUnicode_AsWideCharString(value_obj, &value_length); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Python/dynload_win.c b/Python/dynload_win.c index a027c822f188f4..fcd9f6dcbe67ec 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -172,7 +172,10 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, #endif #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ diff --git a/Python/fileutils.c b/Python/fileutils.c index 9b60f6eca412e2..be2dd72073f89a 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1010,7 +1010,10 @@ _Py_stat(PyObject *path, struct stat *statbuf) struct _stat wstatbuf; #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *wpath = _PyUnicode_AsUnicode(path); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ @@ -1454,7 +1457,10 @@ _Py_fopen_obj(PyObject *path, const char *mode) return NULL; } #if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *wpath = _PyUnicode_AsUnicode(path); +_Py_COMP_DIAG_POP #else /* USE_UNICODE_WCHAR_CACHE */ wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); #endif /* USE_UNICODE_WCHAR_CACHE */ From ed14aa950b07c02eced6b5499e203ca881d5631b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 10:40:01 +0300 Subject: [PATCH 09/17] Fix compiler warning in _testcapi. --- Modules/_testcapimodule.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3bd51983112167..76c22f5f099b37 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1827,6 +1827,10 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); +#if USE_UNICODE_WCHAR_CACHE +/* Ignore use of deprecated APIs */ +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_FromUnicode(invalid, 1); if (wide == NULL) PyErr_Clear(); @@ -1834,10 +1838,6 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromUnicode(L\"\\U00110000\", 1) didn't fail"); -#if USE_UNICODE_WCHAR_CACHE -/* Ignore use of deprecated APIs */ -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_FromUnicode(NULL, 1); if (wide == NULL) return NULL; From 97b5228a01dbfd995a2b815b184ff2f0962b1928 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 5 Jul 2020 20:23:03 +0300 Subject: [PATCH 10/17] Use HAVE_UNICODE_WCHAR_CACHE instead of USE_UNICODE_WCHAR_CACHE in _Py_DumpASCII(). --- Python/traceback.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/traceback.c b/Python/traceback.c index 1fbb86bd168de5..e183782400556e 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -684,9 +684,9 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; -#if USE_UNICODE_WCHAR_CACHE +#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr = NULL; -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -694,7 +694,7 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; -#if USE_UNICODE_WCHAR_CACHE +#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) @@ -702,7 +702,7 @@ _Py_DumpASCII(int fd, PyObject *text) size = ((PyCompactUnicodeObject *)text)->wstr_length; } else -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; @@ -724,11 +724,11 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { -#if USE_UNICODE_WCHAR_CACHE +#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) ch = wstr[i]; else -#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif /* HAVE_UNICODE_WCHAR_CACHE */ ch = PyUnicode_READ(kind, data, i); if (' ' <= ch && ch <= 126) { /* printable ASCII character */ From d6ba6b7511173ddd75eed88b3cab5ec28898f894 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Jul 2020 11:03:28 +0300 Subject: [PATCH 11/17] Set HAVE_UNICODE_WCHAR_CACHE and USE_UNICODE_WCHAR_CACHE only if they were not set. --- Include/cpython/unicodeobject.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 21b87678215e3d..5ac89a1e35e5d9 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,8 +11,13 @@ /* --- Internal Unicode Operations ---------------------------------------- */ -#define HAVE_UNICODE_WCHAR_CACHE 1 -#define USE_UNICODE_WCHAR_CACHE 1 +#ifndef HAVE_UNICODE_WCHAR_CACHE +# define HAVE_UNICODE_WCHAR_CACHE 1 +#endif /* HAVE_UNICODE_WCHAR_CACHE */ + +#ifndef USE_UNICODE_WCHAR_CACHE +# define USE_UNICODE_WCHAR_CACHE HAVE_UNICODE_WCHAR_CACHE +#endif /* USE_UNICODE_WCHAR_CACHE */ /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we From 0da41468ff7db9dc893b75a4c463f9cb0ca3188a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 9 Jul 2020 11:06:12 +0300 Subject: [PATCH 12/17] Remove Py_UNICODE_MATCH. --- Include/cpython/unicodeobject.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 5ac89a1e35e5d9..19571c302b1341 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -81,15 +81,6 @@ Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) { #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) #if HAVE_UNICODE_WCHAR_CACHE -/* Check if substring matches at given offset. The offset must be - valid, and the substring must not be empty. */ - -#define Py_UNICODE_MATCH(string, offset, substring) \ - ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ - ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ - !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - /* --- Unicode Type ------------------------------------------------------- */ /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject From e42590850e33e8af4541a547522166ef96c36e5e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 09:52:31 +0300 Subject: [PATCH 13/17] Fix unterminated #if. --- Include/cpython/unicodeobject.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 19571c302b1341..95ccef38086a8c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -80,7 +80,6 @@ Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) { /* low surrogate = bottom 10 bits added to DC00 */ #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) -#if HAVE_UNICODE_WCHAR_CACHE /* --- Unicode Type ------------------------------------------------------- */ /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject From 8a9259b90d24a1e8bb1c7d92b701001e1c8fc94f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 10:10:09 +0300 Subject: [PATCH 14/17] Reset arraymodule.c. --- Modules/arraymodule.c | 126 +++++++++------------------------ Modules/clinic/arraymodule.c.h | 20 +++--- 2 files changed, 42 insertions(+), 104 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 6e2c3717c28253..2ba2ff43aa8b8a 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -241,40 +241,29 @@ u_getitem(arrayobject *ap, Py_ssize_t i) static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { -#if USE_UNICODE_WCHAR_CACHE - Py_UNICODE *p; - Py_ssize_t len; - - if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) + PyObject *u; + if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) { return -1; - if (len != 1) { + } + + Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0); + if (len != 2) { PyErr_SetString(PyExc_TypeError, "array item must be unicode character"); return -1; } - if (i >= 0) - ((Py_UNICODE *)ap->ob_item)[i] = p[0]; - return 0; -#else - PyObject *u; - Py_ssize_t len; - if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) - return -1; - len = PyUnicode_AsWideChar(u, NULL, 0); - if (len != 2) { - if (len >= 0) { - PyErr_SetString(PyExc_TypeError, - "array item must be unicode character"); - } - return -1; + wchar_t w; + len = PyUnicode_AsWideChar(u, &w, 1); + assert(len == 1); + + if (i >= 0) { + ((wchar_t *)ap->ob_item)[i] = w; } - if (i >= 0) - PyUnicode_AsWideChar(u, &((wchar_t *)ap->ob_item)[i], 1); return 0; -#endif /* USE_UNICODE_WCHAR_CACHE */ } + static PyObject * h_getitem(arrayobject *ap, Py_ssize_t i) { @@ -1667,7 +1656,7 @@ array_array_tobytes_impl(arrayobject *self) /*[clinic input] array.array.fromunicode - arg: unicode + ustr: unicode / Extends this array with data from the unicode string ustr. @@ -1678,41 +1667,28 @@ some other type. [clinic start generated code]*/ static PyObject * -array_array_fromunicode_impl(arrayobject *self, PyObject *arg) -/*[clinic end generated code: output=b691324745f39114 input=bc9c8ea0d901b328]*/ +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) +/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/ { - Py_ssize_t len; - - char typecode = self->ob_descr->typecode; - if (typecode != 'u') { + if (self->ob_descr->typecode != 'u') { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - len = PyUnicode_GetSize(arg); - if (len < 0) { - return NULL; - } -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ - len = PyUnicode_AsWideChar(arg, NULL, 0); - if (len < 0) { - return NULL; - } - assert(len > 0); - len--; -#endif /* USE_UNICODE_WCHAR_CACHE */ - - if (len > 0) { + Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); + assert(ustr_length > 0); + if (ustr_length > 1) { + ustr_length--; /* trim trailing NUL character */ Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + len) == -1) + if (array_resize(self, old_size + ustr_length) == -1) { return NULL; - PyUnicode_AsWideChar(arg, (wchar_t *)self->ob_item + old_size, len); + } + + // must not fail + PyUnicode_AsWideChar( + ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); } Py_RETURN_NONE; @@ -2697,58 +2673,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } else if (initial != NULL && PyUnicode_Check(initial)) { Py_ssize_t n; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - Py_UNICODE *ustr; - - ustr = PyUnicode_AsUnicode(initial); + wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n); if (ustr == NULL) { - PyErr_NoMemory(); Py_DECREF(a); return NULL; } - n = PyUnicode_GET_DATA_SIZE(initial); - if (n > 0) { - arrayobject *self = (arrayobject *)a; - char *item = self->ob_item; - item = (char *)PyMem_Realloc(item, n); - if (item == NULL) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; - } - self->ob_item = item; - Py_SET_SIZE(self, n / sizeof(Py_UNICODE)); - memcpy(item, ustr, n); - self->allocated = Py_SIZE(self); - } -_Py_COMP_DIAG_POP -#else /* USE_UNICODE_WCHAR_CACHE */ - n = PyUnicode_AsWideChar(initial, NULL, 0); - if (n < 0) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; - } - assert(n > 0); - n--; if (n > 0) { arrayobject *self = (arrayobject *)a; - char *item = self->ob_item; - item = (char *)PyMem_Realloc(item, n * sizeof(wchar_t)); - if (item == NULL) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; - } - self->ob_item = item; + // self->ob_item may be NULL but it is safe. + PyMem_Free(self->ob_item); + self->ob_item = (char *)ustr; Py_SET_SIZE(self, n); - PyUnicode_AsWideChar(initial, (wchar_t*)item, n); - self->allocated = Py_SIZE(self); + self->allocated = n; } -#endif /* USE_UNICODE_WCHAR_CACHE */ } else if (initial != NULL && array_Check(initial) && len > 0) { arrayobject *self = (arrayobject *)a; diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index 7a1dfaf3b8aeda..300cd1397101e8 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -352,7 +352,7 @@ array_array_tobytes(arrayobject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(array_array_fromunicode__doc__, -"fromunicode($self, arg, /)\n" +"fromunicode($self, ustr, /)\n" "--\n" "\n" "Extends this array with data from the unicode string ustr.\n" @@ -365,23 +365,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__, {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__}, static PyObject * -array_array_fromunicode_impl(arrayobject *self, PyObject *arg); +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr); static PyObject * -array_array_fromunicode(arrayobject *self, PyObject *arg_) +array_array_fromunicode(arrayobject *self, PyObject *arg) { PyObject *return_value = NULL; - PyObject *arg; + PyObject *ustr; - if (!PyUnicode_Check(arg_)) { - _PyArg_BadArgument("fromunicode", "argument", "str", arg_); + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("fromunicode", "argument", "str", arg); goto exit; } - if (PyUnicode_READY(arg_) == -1) { + if (PyUnicode_READY(arg) == -1) { goto exit; } - arg = arg_; - return_value = array_array_fromunicode_impl(self, arg); + ustr = arg; + return_value = array_array_fromunicode_impl(self, ustr); exit: return return_value; @@ -514,4 +514,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=c5fbfe7c7b4ff2a8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=91c1cded65a1285f input=a9049054013a1b77]*/ From 16ac7fd44112ba67b035d5b29aab9316ec63c6f4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 11:20:06 +0300 Subject: [PATCH 15/17] Temporary disable the wchar_t cache by default. --- Include/cpython/unicodeobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 95ccef38086a8c..8f39b2d1f7c39c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -12,7 +12,7 @@ /* --- Internal Unicode Operations ---------------------------------------- */ #ifndef HAVE_UNICODE_WCHAR_CACHE -# define HAVE_UNICODE_WCHAR_CACHE 1 +# define HAVE_UNICODE_WCHAR_CACHE 0 #endif /* HAVE_UNICODE_WCHAR_CACHE */ #ifndef USE_UNICODE_WCHAR_CACHE From 5950b5caaa45fd8d484129d7da540b6e07211537 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 14:57:43 +0300 Subject: [PATCH 16/17] Remove HAVE_UNICODE_WCHAR_CACHE. --- Include/cpython/unicodeobject.h | 14 +- Objects/unicodeobject.c | 244 +++++++++++--------------------- Python/getargs.c | 9 +- Python/traceback.c | 15 +- 4 files changed, 86 insertions(+), 196 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 8f39b2d1f7c39c..615b4a971d5f47 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,12 +11,8 @@ /* --- Internal Unicode Operations ---------------------------------------- */ -#ifndef HAVE_UNICODE_WCHAR_CACHE -# define HAVE_UNICODE_WCHAR_CACHE 0 -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - #ifndef USE_UNICODE_WCHAR_CACHE -# define USE_UNICODE_WCHAR_CACHE HAVE_UNICODE_WCHAR_CACHE +# define USE_UNICODE_WCHAR_CACHE 1 #endif /* USE_UNICODE_WCHAR_CACHE */ /* Since splitting on whitespace is an important use case, and @@ -219,9 +215,7 @@ typedef struct { 4 bytes (see issue #19537 on m68k). */ unsigned int :24; } state; -#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr; /* wchar_t representation (null-terminated) */ -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the @@ -232,10 +226,8 @@ typedef struct { Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the * terminating \0. */ char *utf8; /* UTF-8 representation (null-terminated) */ -#if HAVE_UNICODE_WCHAR_CACHE Py_ssize_t wstr_length; /* Number of code points in wstr, possible * surrogates count as two code points. */ -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } PyCompactUnicodeObject; /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the @@ -256,7 +248,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( int check_content); /* Fast access macros */ -#if HAVE_UNICODE_WCHAR_CACHE /* Returns the deprecated Py_UNICODE representation's size in code units (this includes surrogate pairs as 2 units). @@ -291,7 +282,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( #define PyUnicode_AS_DATA(op) \ ((const char *)(PyUnicode_AS_UNICODE(op))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ @@ -452,7 +442,6 @@ enum PyUnicode_Kind { (0xffffU) : \ (0x10ffffU))))) -#if HAVE_UNICODE_WCHAR_CACHE Py_DEPRECATED(3.3) static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) { return PyUnicode_IS_COMPACT_ASCII(op) ? @@ -460,7 +449,6 @@ static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) { ((PyCompactUnicodeObject*)op)->wstr_length; } #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* === Public API ========================================================= */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e5f17248695c92..b6b56c4f97fa9c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -117,7 +117,6 @@ extern "C" { PyUnicode_IS_COMPACT_ASCII(op) ? \ ((PyASCIIObject*)(op))->length : \ _PyUnicode_UTF8_LENGTH(op)) -#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_WSTR(op) \ (((PyASCIIObject*)(op))->wstr) @@ -129,7 +128,6 @@ extern "C" { ((PyCompactUnicodeObject*)op)->wstr_length) #define _PyUnicode_WSTR_LENGTH(op) \ (((PyCompactUnicodeObject*)(op))->wstr_length) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ #define _PyUnicode_LENGTH(op) \ (((PyASCIIObject *)(op))->length) #define _PyUnicode_STATE(op) \ @@ -156,11 +154,9 @@ extern "C" { (assert(_PyUnicode_CHECK(op)), \ assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) -#if HAVE_UNICODE_WCHAR_CACHE #define _PyUnicode_SHARE_WSTR(op) \ (assert(_PyUnicode_CHECK(op)), \ (_PyUnicode_WSTR(unicode) == PyUnicode_DATA(op))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ @@ -169,14 +165,12 @@ extern "C" { && _PyUnicode_UTF8(op) \ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) -#if HAVE_UNICODE_WCHAR_CACHE /* true if the Unicode object has an allocated wstr memory block (not shared with other data) */ #define _PyUnicode_HAS_WSTR_MEMORY(op) \ ((_PyUnicode_WSTR(op) && \ (!PyUnicode_IS_READY(op) || \ _PyUnicode_WSTR(op) != PyUnicode_DATA(op)))) -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end @@ -539,7 +533,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) PyUnicodeObject *unicode = (PyUnicodeObject *)op; data = unicode->data.any; -#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { CHECK(ascii->length == 0); CHECK(ascii->hash == -1); @@ -551,9 +544,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(data == NULL); CHECK(compact->utf8 == NULL); } - else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - { + else { CHECK(kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND || kind == PyUnicode_4BYTE_KIND); @@ -568,7 +559,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(compact->utf8 != data); } } -#if HAVE_UNICODE_WCHAR_CACHE if (kind != PyUnicode_WCHAR_KIND) { if ( #if SIZEOF_WCHAR_T == 2 @@ -583,14 +573,11 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) } else CHECK(ascii->wstr != data); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (compact->utf8 == NULL) CHECK(compact->utf8_length == 0); -#if HAVE_UNICODE_WCHAR_CACHE if (ascii->wstr == NULL) CHECK(compact->wstr_length == 0); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /* check that the best kind is used: O(n) operation */ @@ -978,7 +965,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find_max_char.h" #include "stringlib/undef.h" -#if HAVE_UNICODE_WCHAR_CACHE _Py_COMP_DIAG_PUSH _Py_COMP_DIAG_IGNORE_DEPR_DECLS #include "stringlib/unicodedefs.h" @@ -987,8 +973,8 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS #include "stringlib/find.h" #include "stringlib/undef.h" _Py_COMP_DIAG_POP + #undef STRINGLIB_GET_EMPTY -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* --- Unicode Object ----------------------------------------------------- */ @@ -1047,6 +1033,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; + int share_wstr; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -1061,9 +1048,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) struct_size = sizeof(PyASCIIObject); else struct_size = sizeof(PyCompactUnicodeObject); -#if HAVE_UNICODE_WCHAR_CACHE - int share_wstr = _PyUnicode_SHARE_WSTR(unicode); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + share_wstr = _PyUnicode_SHARE_WSTR(unicode); if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -1093,7 +1078,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) _Py_NewReference(unicode); _PyUnicode_LENGTH(unicode) = length; -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = PyUnicode_DATA(unicode); if (!PyUnicode_IS_ASCII(unicode)) @@ -1105,7 +1089,6 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = 0; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif @@ -1118,13 +1101,14 @@ resize_compact(PyObject *unicode, Py_ssize_t length) static int resize_inplace(PyObject *unicode, Py_ssize_t length) { + wchar_t *wstr; Py_ssize_t new_size; assert(!PyUnicode_IS_COMPACT(unicode)); assert(Py_REFCNT(unicode) == 1); if (PyUnicode_IS_READY(unicode)) { Py_ssize_t char_size; - int share_utf8; + int share_wstr, share_utf8; void *data; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -1132,9 +1116,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) data = _PyUnicode_DATA_ANY(unicode); char_size = PyUnicode_KIND(unicode); -#if HAVE_UNICODE_WCHAR_CACHE - int share_wstr = _PyUnicode_SHARE_WSTR(unicode); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + share_wstr = _PyUnicode_SHARE_WSTR(unicode); share_utf8 = _PyUnicode_SHARE_UTF8(unicode); if (length > (PY_SSIZE_T_MAX / char_size - 1)) { @@ -1156,12 +1138,10 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } _PyUnicode_DATA_ANY(unicode) = data; -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR(unicode) = data; _PyUnicode_WSTR_LENGTH(unicode) = length; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (share_utf8) { _PyUnicode_UTF8(unicode) = data; _PyUnicode_UTF8_LENGTH(unicode) = length; @@ -1171,16 +1151,11 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) { -#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; -#if HAVE_UNICODE_WCHAR_CACHE } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } -#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ @@ -1189,7 +1164,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) return -1; } new_size = sizeof(wchar_t) * (length + 1); - wchar_t *wstr = _PyUnicode_WSTR(unicode); + wstr = _PyUnicode_WSTR(unicode); wstr = PyObject_REALLOC(wstr, new_size); if (!wstr) { PyErr_NoMemory(); @@ -1198,7 +1173,6 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) _PyUnicode_WSTR(unicode) = wstr; _PyUnicode_WSTR(unicode)[length] = 0; _PyUnicode_WSTR_LENGTH(unicode) = length; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_CheckConsistency(unicode, 0)); return 0; } @@ -1207,8 +1181,20 @@ static PyObject* resize_copy(PyObject *unicode, Py_ssize_t length) { Py_ssize_t copy_length; -#if HAVE_UNICODE_WCHAR_CACHE - if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) { + if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND) { + PyObject *copy; + + assert(PyUnicode_IS_READY(unicode)); + + copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); + if (copy == NULL) + return NULL; + + copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); + return copy; + } + else { PyObject *w; w = (PyObject*)_PyUnicode_New(length); @@ -1220,18 +1206,6 @@ resize_copy(PyObject *unicode, Py_ssize_t length) copy_length * sizeof(wchar_t)); return w; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - PyObject *copy; - - assert(PyUnicode_IS_READY(unicode)); - - copy = PyUnicode_New(length, PyUnicode_MAX_CHAR_VALUE(unicode)); - if (copy == NULL) - return NULL; - - copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); - return copy; } /* We allocate one more byte to make sure the string is @@ -1246,12 +1220,14 @@ resize_copy(PyObject *unicode, Py_ssize_t length) static PyUnicodeObject * _PyUnicode_New(Py_ssize_t length) { + PyUnicodeObject *unicode; + size_t new_size; + /* Optimization for empty strings */ if (length == 0) { return (PyUnicodeObject *)unicode_new_empty(); } -#if HAVE_UNICODE_WCHAR_CACHE /* Ensure we won't overflow the size. */ if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { return (PyUnicodeObject *)PyErr_NoMemory(); @@ -1262,10 +1238,10 @@ _PyUnicode_New(Py_ssize_t length) return NULL; } - PyUnicodeObject *unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); + unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) return NULL; - size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); _PyUnicode_WSTR_LENGTH(unicode) = length; _PyUnicode_HASH(unicode) = -1; @@ -1298,11 +1274,6 @@ _PyUnicode_New(Py_ssize_t length) assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0)); return unicode; -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_SetString(PyExc_SystemError, - "_PyUnicode_New() with non-zero size is not supported"); - return NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } static const char* @@ -1384,23 +1355,18 @@ _PyUnicode_Dump(PyObject *op) } else data = unicode->data.any; - printf("%s: len=%zu", unicode_kind_name(op), ascii->length); + printf("%s: len=%zu, ", unicode_kind_name(op), ascii->length); -#if HAVE_UNICODE_WCHAR_CACHE - printf(", "); if (ascii->wstr == data) printf("shared "); - printf("wstr=%p", ascii->wstr); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + printf("wstr=%p", (void *)ascii->wstr); if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) { -#if HAVE_UNICODE_WCHAR_CACHE - printf(" (%zu)", compact->wstr_length); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - printf(", "); - if (!ascii->state.compact && compact->utf8 == unicode->data.any) + printf(" (%zu), ", compact->wstr_length); + if (!ascii->state.compact && compact->utf8 == unicode->data.any) { printf("shared "); - printf("utf8=%p (%zu)", compact->utf8, compact->utf8_length); + } + printf("utf8=%p (%zu)", (void *)compact->utf8, compact->utf8_length); } printf(", data=%p\n", data); } @@ -1438,22 +1404,28 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) PyCompactUnicodeObject *unicode; void *data; enum PyUnicode_Kind kind; - int is_ascii; + int is_sharing, is_ascii; Py_ssize_t char_size; Py_ssize_t struct_size; is_ascii = 0; + is_sharing = 0; struct_size = sizeof(PyCompactUnicodeObject); if (maxchar < 128) { kind = PyUnicode_1BYTE_KIND; + char_size = 1; is_ascii = 1; struct_size = sizeof(PyASCIIObject); } else if (maxchar < 256) { kind = PyUnicode_1BYTE_KIND; + char_size = 1; } else if (maxchar < 65536) { kind = PyUnicode_2BYTE_KIND; + char_size = 2; + if (sizeof(wchar_t) == 2) + is_sharing = 1; } else { if (maxchar > MAX_UNICODE) { @@ -1462,11 +1434,10 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) return NULL; } kind = PyUnicode_4BYTE_KIND; + char_size = 4; + if (sizeof(wchar_t) == 4) + is_sharing = 1; } - char_size = kind; -#if HAVE_UNICODE_WCHAR_CACHE - int is_sharing = (sizeof(wchar_t) == kind); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the size. */ if (size < 0) { @@ -1501,35 +1472,29 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) _PyUnicode_STATE(unicode).ascii = is_ascii; if (is_ascii) { ((char*)data)[size] = 0; -#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR(unicode) = NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + } + else if (kind == PyUnicode_1BYTE_KIND) { + ((char*)data)[size] = 0; + _PyUnicode_WSTR(unicode) = NULL; + _PyUnicode_WSTR_LENGTH(unicode) = 0; + unicode->utf8 = NULL; + unicode->utf8_length = 0; } else { unicode->utf8 = NULL; unicode->utf8_length = 0; - if (kind == PyUnicode_1BYTE_KIND) { - ((char*)data)[size] = 0; -#if HAVE_UNICODE_WCHAR_CACHE - _PyUnicode_WSTR(unicode) = NULL; - _PyUnicode_WSTR_LENGTH(unicode) = 0; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + if (kind == PyUnicode_2BYTE_KIND) + ((Py_UCS2*)data)[size] = 0; + else /* kind == PyUnicode_4BYTE_KIND */ + ((Py_UCS4*)data)[size] = 0; + if (is_sharing) { + _PyUnicode_WSTR_LENGTH(unicode) = size; + _PyUnicode_WSTR(unicode) = (wchar_t *)data; } else { - if (kind == PyUnicode_2BYTE_KIND) - ((Py_UCS2*)data)[size] = 0; - else /* kind == PyUnicode_4BYTE_KIND */ - ((Py_UCS4*)data)[size] = 0; -#if HAVE_UNICODE_WCHAR_CACHE - if (is_sharing) { - _PyUnicode_WSTR_LENGTH(unicode) = size; - _PyUnicode_WSTR(unicode) = (wchar_t *)data; - } - else { - _PyUnicode_WSTR_LENGTH(unicode) = 0; - _PyUnicode_WSTR(unicode) = NULL; - } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + _PyUnicode_WSTR_LENGTH(unicode) = 0; + _PyUnicode_WSTR(unicode) = NULL; } } #ifdef Py_DEBUG @@ -1844,14 +1809,12 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, int _PyUnicode_Ready(PyObject *unicode) { -#if HAVE_UNICODE_WCHAR_CACHE wchar_t *end; Py_UCS4 maxchar = 0; Py_ssize_t num_surrogates; #if SIZEOF_WCHAR_T == 2 Py_ssize_t length_wo_surrogates; #endif -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* _PyUnicode_Ready() is only intended for old-style API usage where strings were created using _PyObject_New() and where no canonical @@ -1859,15 +1822,12 @@ _PyUnicode_Ready(PyObject *unicode) which are not yet ready. */ assert(_PyUnicode_CHECK(unicode)); assert(_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND); -#if HAVE_UNICODE_WCHAR_CACHE assert(_PyUnicode_WSTR(unicode) != NULL); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ assert(_PyUnicode_DATA_ANY(unicode) == NULL); assert(_PyUnicode_UTF8(unicode) == NULL); /* Actually, it should neither be interned nor be anything else: */ assert(_PyUnicode_STATE(unicode).interned == SSTATE_NOT_INTERNED); -#if HAVE_UNICODE_WCHAR_CACHE end = _PyUnicode_WSTR(unicode) + _PyUnicode_WSTR_LENGTH(unicode); if (find_maxchar_surrogates(_PyUnicode_WSTR(unicode), end, &maxchar, &num_surrogates) == -1) @@ -1970,7 +1930,6 @@ _PyUnicode_Ready(PyObject *unicode) #endif PyUnicode_4BYTE_DATA(unicode)[_PyUnicode_LENGTH(unicode)] = '\0'; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_STATE(unicode).ready = 1; assert(_PyUnicode_CheckConsistency(unicode, 1)); return 0; @@ -2009,11 +1968,9 @@ unicode_dealloc(PyObject *unicode) Py_UNREACHABLE(); } -#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_WSTR(unicode)); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_DEL(_PyUnicode_UTF8(unicode)); } @@ -2076,11 +2033,9 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length) assert(PyUnicode_Check(unicode)); assert(0 <= length); -#if HAVE_UNICODE_WCHAR_CACHE if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) old_length = PyUnicode_WSTR_LENGTH(unicode); else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ old_length = PyUnicode_GET_LENGTH(unicode); if (old_length == length) return 0; @@ -4225,7 +4180,6 @@ PyUnicode_AsUTF8(PyObject *unicode) Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) { -#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; @@ -4255,10 +4209,6 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) if (size != NULL) *size = PyUnicode_WSTR_LENGTH(unicode); return w; -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_SetString(PyExc_SystemError, "PyUnicode_AsUnicodeAndSize is not supported"); - return NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /* Deprecated APIs */ @@ -4290,7 +4240,6 @@ _PyUnicode_AsUnicode(PyObject *unicode) Py_ssize_t PyUnicode_GetSize(PyObject *unicode) { -#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); goto onError; @@ -4303,10 +4252,6 @@ PyUnicode_GetSize(PyObject *unicode) onError: return -1; -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_SetString(PyExc_SystemError, "PyUnicode_GetSize is not supported"); - return -1; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } _Py_COMP_DIAG_POP @@ -11414,11 +11359,10 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) Py_ssize_t i; int kind; Py_UCS4 chr; + const unsigned char *ustr = (const unsigned char *)str; assert(_PyUnicode_CHECK(uni)); -#if HAVE_UNICODE_WCHAR_CACHE if (!PyUnicode_IS_READY(uni)) { - const unsigned char *ustr = (const unsigned char *)str; const wchar_t *ws = _PyUnicode_WSTR(uni); /* Compare Unicode string and source character set string */ for (i = 0; (chr = ws[i]) && ustr[i]; i++) { @@ -11433,7 +11377,6 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) return -1; /* str is longer */ return 0; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ kind = PyUnicode_KIND(uni); if (kind == PyUnicode_1BYTE_KIND) { const void *data = PyUnicode_1BYTE_DATA(uni); @@ -11471,7 +11414,6 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str) } } -#if HAVE_UNICODE_WCHAR_CACHE static int non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) { @@ -11489,7 +11431,6 @@ non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str) } return 1; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ int _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) @@ -11502,13 +11443,11 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str) assert((unsigned char)*p < 128); } #endif -#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(unicode) == -1) { /* Memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(unicode, str); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(unicode)) return 0; len = (size_t)PyUnicode_GET_LENGTH(unicode); @@ -11529,13 +11468,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) } #endif -#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_READY(left) == -1) { /* memory error or bad data */ PyErr_Clear(); return non_ready_unicode_equal_to_ascii_string(left, right->string); } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (!PyUnicode_IS_ASCII(left)) return 0; @@ -11813,7 +11750,7 @@ PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right) } /* -Wraps asciilib_parse_args_finds() and additionally ensures that the +Wraps stringlib_parse_args_finds() and additionally ensures that the first argument is a unicode object. */ @@ -11822,7 +11759,7 @@ parse_args_finds_unicode(const char * function_name, PyObject *args, PyObject **substring, Py_ssize_t *start, Py_ssize_t *end) { - if(asciilib_parse_args_finds(function_name, args, substring, + if(stringlib_parse_args_finds(function_name, args, substring, start, end)) { if (ensure_unicode(*substring) < 0) return 0; @@ -12573,14 +12510,11 @@ _PyUnicode_ScanIdentifier(PyObject *self) int PyUnicode_IsIdentifier(PyObject *self) { -#if HAVE_UNICODE_WCHAR_CACHE if (PyUnicode_IS_READY(self)) { -#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_ssize_t i = _PyUnicode_ScanIdentifier(self); Py_ssize_t len = PyUnicode_GET_LENGTH(self); /* an empty string is not a valid identifier */ return len && i == len; -#if HAVE_UNICODE_WCHAR_CACHE } else { _Py_COMP_DIAG_PUSH @@ -12624,7 +12558,6 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS return 1; _Py_COMP_DIAG_POP } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ } /*[clinic input] @@ -13902,7 +13835,7 @@ unicode_startswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!asciilib_parse_args_finds("startswith", args, &subobj, &start, &end)) + if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -13956,7 +13889,7 @@ unicode_endswith(PyObject *self, Py_ssize_t end = PY_SSIZE_T_MAX; int result; - if (!asciilib_parse_args_finds("endswith", args, &subobj, &start, &end)) + if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) return NULL; if (PyTuple_Check(subobj)) { Py_ssize_t i; @@ -14400,12 +14333,10 @@ unicode_sizeof_impl(PyObject *self) size += (PyUnicode_GET_LENGTH(self) + 1) * PyUnicode_KIND(self); } -#if HAVE_UNICODE_WCHAR_CACHE /* If the wstr pointer is present, account for it unless it is shared with the data pointer. Check if the data is not shared. */ if (_PyUnicode_HAS_WSTR_MEMORY(self)) size += (PyUnicode_WSTR_LENGTH(self) + 1) * sizeof(wchar_t); -#endif /* HAVE_UNICODE_WCHAR_CACHE */ if (_PyUnicode_HAS_UTF8_MEMORY(self)) size += PyUnicode_UTF8_LENGTH(self) + 1; @@ -15562,8 +15493,8 @@ static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *unicode, *self; - Py_ssize_t length; - int share_utf8; + Py_ssize_t length, char_size; + int share_wstr, share_utf8; unsigned int kind; void *data; @@ -15597,35 +15528,37 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; _PyUnicode_STATE(self).ready = 1; + _PyUnicode_WSTR(self) = NULL; _PyUnicode_UTF8_LENGTH(self) = 0; _PyUnicode_UTF8(self) = NULL; -#if HAVE_UNICODE_WCHAR_CACHE _PyUnicode_WSTR_LENGTH(self) = 0; - _PyUnicode_WSTR(self) = NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; -#if HAVE_UNICODE_WCHAR_CACHE - int share_wstr = 0; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + share_wstr = 0; if (kind == PyUnicode_1BYTE_KIND) { + char_size = 1; if (PyUnicode_MAX_CHAR_VALUE(unicode) < 128) share_utf8 = 1; } -#if HAVE_UNICODE_WCHAR_CACHE + else if (kind == PyUnicode_2BYTE_KIND) { + char_size = 2; + if (sizeof(wchar_t) == 2) + share_wstr = 1; + } else { - if (sizeof(wchar_t) == kind) + assert(kind == PyUnicode_4BYTE_KIND); + char_size = 4; + if (sizeof(wchar_t) == 4) share_wstr = 1; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ /* Ensure we won't overflow the length. */ - if (length > (PY_SSIZE_T_MAX / kind - 1)) { + if (length > (PY_SSIZE_T_MAX / char_size - 1)) { PyErr_NoMemory(); goto onError; } - data = PyObject_MALLOC((length + 1) * kind); + data = PyObject_MALLOC((length + 1) * char_size); if (data == NULL) { PyErr_NoMemory(); goto onError; @@ -15636,12 +15569,10 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _PyUnicode_UTF8_LENGTH(self) = length; _PyUnicode_UTF8(self) = data; } -#if HAVE_UNICODE_WCHAR_CACHE if (share_wstr) { _PyUnicode_WSTR_LENGTH(self) = length; _PyUnicode_WSTR(self) = (wchar_t *)data; } -#endif /* HAVE_UNICODE_WCHAR_CACHE */ memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1)); @@ -15980,7 +15911,7 @@ unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), it->it_seq, it->it_index); } else { - PyObject *u = PyUnicode_New(0, 0); + PyObject *u = (PyObject *)_PyUnicode_New(0); if (u == NULL) return NULL; return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); @@ -16071,21 +16002,6 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } -Py_UNICODE* -PyUnicode_AsUnicodeCopy(PyObject *unicode) -{ - Py_ssize_t len; -#if USE_UNICODE_WCHAR_CACHE -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - if (PyUnicode_AsUnicode(unicode) == NULL) - return NULL; -_Py_COMP_DIAG_POP -#endif /* USE_UNICODE_WCHAR_CACHE */ - return PyUnicode_AsWideCharString(unicode, &len); -} - - static int encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) { diff --git a/Python/getargs.c b/Python/getargs.c index 534e209406e4dc..c85ff6d4777d2c 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1014,7 +1014,6 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, case 'u': /* raw unicode buffer (Py_UNICODE *) */ case 'Z': /* raw unicode buffer or None */ { -#if HAVE_UNICODE_WCHAR_CACHE // TODO: Raise DeprecationWarning _Py_COMP_DIAG_PUSH _Py_COMP_DIAG_IGNORE_DEPR_DECLS @@ -1057,14 +1056,8 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS return converterr(c == 'Z' ? "str or None" : "str", arg, msgbuf, bufsize); } -_Py_COMP_DIAG_POP -#else /* HAVE_UNICODE_WCHAR_CACHE */ - PyErr_Format(PyExc_SystemError, - "unsupported PyArg_Parse format: \"%s\"", - format-1); - RETURN_ERR_OCCURRED; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ break; +_Py_COMP_DIAG_POP } case 'e': {/* encoded string */ diff --git a/Python/traceback.c b/Python/traceback.c index e183782400556e..99b63af11f8bee 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -684,9 +684,7 @@ _Py_DumpASCII(int fd, PyObject *text) int truncated; int kind; void *data = NULL; -#if HAVE_UNICODE_WCHAR_CACHE wchar_t *wstr = NULL; -#endif /* HAVE_UNICODE_WCHAR_CACHE */ Py_UCS4 ch; if (!PyUnicode_Check(text)) @@ -694,16 +692,13 @@ _Py_DumpASCII(int fd, PyObject *text) size = ascii->length; kind = ascii->state.kind; -#if HAVE_UNICODE_WCHAR_CACHE if (kind == PyUnicode_WCHAR_KIND) { wstr = ((PyASCIIObject *)text)->wstr; if (wstr == NULL) return; size = ((PyCompactUnicodeObject *)text)->wstr_length; } - else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ - if (ascii->state.compact) { + else if (ascii->state.compact) { if (ascii->state.ascii) data = ((PyASCIIObject*)text) + 1; else @@ -724,12 +719,10 @@ _Py_DumpASCII(int fd, PyObject *text) } for (i=0; i < size; i++) { -#if HAVE_UNICODE_WCHAR_CACHE - if (kind == PyUnicode_WCHAR_KIND) - ch = wstr[i]; - else -#endif /* HAVE_UNICODE_WCHAR_CACHE */ + if (kind != PyUnicode_WCHAR_KIND) ch = PyUnicode_READ(kind, data, i); + else + ch = wstr[i]; if (' ' <= ch && ch <= 126) { /* printable ASCII character */ char c = (char)ch; From 672b8d71dd40a3a2e3f6db1e7ba7a020b41c66fe Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 21:13:41 +0300 Subject: [PATCH 17/17] Fix possible leaks. --- PC/winreg.c | 3 +++ Python/fileutils.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/PC/winreg.c b/PC/winreg.c index 56e8c3a0aa5f74..a24d784c773c02 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -1731,6 +1731,9 @@ _Py_COMP_DIAG_POP if (PySys_Audit("winreg.SetValue", "nunu#", (Py_ssize_t)key, sub_key, (Py_ssize_t)type, value, value_length) < 0) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } diff --git a/Python/fileutils.c b/Python/fileutils.c index 9b60f6eca412e2..50ef3c174acc84 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1465,6 +1465,9 @@ _Py_fopen_obj(PyObject *path, const char *mode) wmode, Py_ARRAY_LENGTH(wmode)); if (usize == 0) { PyErr_SetFromWindowsErr(0); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; }