Skip to content

Commit 2c4928d

Browse files
methaneaerosvstinner
authored
bpo-36346: Add Py_DEPRECATED to deprecated unicode APIs (pythonGH-20878)
Co-authored-by: Kyle Stanley <[email protected]> Co-authored-by: Victor Stinner <[email protected]>
1 parent 818f5b5 commit 2c4928d

File tree

6 files changed

+75
-23
lines changed

6 files changed

+75
-23
lines changed

Doc/whatsnew/3.9.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,12 @@ Porting to Python 3.9
10971097
internal C API (``pycore_gc.h``).
10981098
(Contributed by Victor Stinner in :issue:`40241`.)
10991099

1100+
* The ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
1101+
:c:func:`PyUnicode_FromUnicode`, :c:func:`PyUnicode_AsUnicode`,
1102+
``_PyUnicode_AsUnicode``, and :c:func:`PyUnicode_AsUnicodeAndSize` are
1103+
marked as deprecated in C. They have been deprecated by :pep:`393` since
1104+
Python 3.3.
1105+
(Contributed by Inada Naoki in :issue:`36346`.)
11001106

11011107
Removed
11021108
-------
@@ -1165,3 +1171,8 @@ Removed
11651171

11661172
* Remove ``_PyUnicode_ClearStaticStrings()`` function.
11671173
(Contributed by Victor Stinner in :issue:`39465`.)
1174+
1175+
* Remove ``Py_UNICODE_MATCH``. It has been deprecated by :pep:`393`, and
1176+
broken since Python 3.3. The :c:func:`PyUnicode_Tailmatch` function can be
1177+
used instead.
1178+
(Contributed by Inada Naoki in :issue:`36346`.)

Include/cpython/unicodeobject.h

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,17 @@
4646
Py_UNICODE_ISDIGIT(ch) || \
4747
Py_UNICODE_ISNUMERIC(ch))
4848

49-
#define Py_UNICODE_COPY(target, source, length) \
50-
memcpy((target), (source), (length)*sizeof(Py_UNICODE))
51-
52-
#define Py_UNICODE_FILL(target, value, length) \
53-
do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
54-
for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
55-
} while (0)
49+
Py_DEPRECATED(3.3) static inline void
50+
Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
51+
memcpy(target, source, length * sizeof(Py_UNICODE));
52+
}
53+
54+
Py_DEPRECATED(3.3) static inline void
55+
Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
56+
for (Py_ssize_t i = 0; i < length; i++) {
57+
target[i] = value;
58+
}
59+
}
5660

5761
/* macros to work with surrogates */
5862
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
@@ -67,14 +71,6 @@
6771
/* low surrogate = bottom 10 bits added to DC00 */
6872
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
6973

70-
/* Check if substring matches at given offset. The offset must be
71-
valid, and the substring must not be empty. */
72-
73-
#define Py_UNICODE_MATCH(string, offset, substring) \
74-
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
75-
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
76-
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
77-
7874
/* --- Unicode Type ------------------------------------------------------- */
7975

8076
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
@@ -247,10 +243,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
247243
int check_content);
248244

249245
/* Fast access macros */
250-
#define PyUnicode_WSTR_LENGTH(op) \
251-
(PyUnicode_IS_COMPACT_ASCII(op) ? \
252-
((PyASCIIObject*)op)->length : \
253-
((PyCompactUnicodeObject*)op)->wstr_length)
254246

255247
/* Returns the deprecated Py_UNICODE representation's size in code units
256248
(this includes surrogate pairs as 2 units).
@@ -445,6 +437,14 @@ enum PyUnicode_Kind {
445437
(0xffffU) : \
446438
(0x10ffffU)))))
447439

440+
Py_DEPRECATED(3.3)
441+
static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
442+
return PyUnicode_IS_COMPACT_ASCII(op) ?
443+
((PyASCIIObject*)op)->length :
444+
((PyCompactUnicodeObject*)op)->wstr_length;
445+
}
446+
#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
447+
448448
/* === Public API ========================================================= */
449449

450450
/* --- Plain Py_UNICODE --------------------------------------------------- */
@@ -543,7 +543,7 @@ PyAPI_FUNC(void) _PyUnicode_FastFill(
543543
only allowed if u was set to NULL.
544544
545545
The buffer is copied into the new object. */
546-
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
546+
Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
547547
const Py_UNICODE *u, /* Unicode buffer */
548548
Py_ssize_t size /* size of buffer */
549549
);
@@ -572,13 +572,13 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
572572
Py_UNICODE buffer.
573573
If the wchar_t/Py_UNICODE representation is not yet available, this
574574
function will calculate it. */
575-
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
575+
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
576576
PyObject *unicode /* Unicode object */
577577
);
578578

579579
/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
580580
contains null characters. */
581-
PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
581+
Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
582582
PyObject *unicode /* Unicode object */
583583
);
584584

@@ -587,7 +587,7 @@ PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
587587
If the wchar_t/Py_UNICODE representation is not yet available, this
588588
function will calculate it. */
589589

590-
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
590+
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
591591
PyObject *unicode, /* Unicode object */
592592
Py_ssize_t *size /* location where to save the length */
593593
);
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Mark ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
2+
``PyUnicode_FromUnicode``, ``PyUnicode_AsUnicode``, ``_PyUnicode_AsUnicode``,
3+
and ``PyUnicode_AsUnicodeAndSize`` as deprecated in C. Remove ``Py_UNICODE_MATCH``
4+
which was deprecated and broken since Python 3.3.

Modules/_testcapimodule.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,6 +1668,10 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)
16681668

16691669
static volatile int x;
16701670

1671+
/* Ignore use of deprecated APIs */
1672+
_Py_COMP_DIAG_PUSH
1673+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1674+
16711675
/* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
16721676
of an error.
16731677
*/
@@ -1844,6 +1848,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
18441848

18451849
Py_RETURN_NONE;
18461850
}
1851+
_Py_COMP_DIAG_POP
18471852

18481853
static PyObject *
18491854
unicode_aswidechar(PyObject *self, PyObject *args)
@@ -2064,6 +2069,10 @@ unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
20642069
return PyUnicode_TransformDecimalToASCII(unicode, length);
20652070
}
20662071

2072+
/* Ignore use of deprecated APIs */
2073+
_Py_COMP_DIAG_PUSH
2074+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
2075+
20672076
static PyObject *
20682077
unicode_legacy_string(PyObject *self, PyObject *args)
20692078
{
@@ -2086,6 +2095,7 @@ unicode_legacy_string(PyObject *self, PyObject *args)
20862095

20872096
return u;
20882097
}
2098+
_Py_COMP_DIAG_POP
20892099

20902100
static PyObject *
20912101
getargs_w_star(PyObject *self, PyObject *args)

Objects/unicodeobject.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ extern "C" {
120120
_PyUnicode_UTF8_LENGTH(op))
121121
#define _PyUnicode_WSTR(op) \
122122
(((PyASCIIObject*)(op))->wstr)
123+
124+
/* Don't use deprecated macro of unicodeobject.h */
125+
#undef PyUnicode_WSTR_LENGTH
126+
#define PyUnicode_WSTR_LENGTH(op) \
127+
(PyUnicode_IS_COMPACT_ASCII(op) ? \
128+
((PyASCIIObject*)op)->length : \
129+
((PyCompactUnicodeObject*)op)->wstr_length)
123130
#define _PyUnicode_WSTR_LENGTH(op) \
124131
(((PyCompactUnicodeObject*)(op))->wstr_length)
125132
#define _PyUnicode_LENGTH(op) \
@@ -970,11 +977,14 @@ ensure_unicode(PyObject *obj)
970977
#include "stringlib/find_max_char.h"
971978
#include "stringlib/undef.h"
972979

980+
_Py_COMP_DIAG_PUSH
981+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
973982
#include "stringlib/unicodedefs.h"
974983
#include "stringlib/fastsearch.h"
975984
#include "stringlib/count.h"
976985
#include "stringlib/find.h"
977986
#include "stringlib/undef.h"
987+
_Py_COMP_DIAG_POP
978988

979989
/* --- Unicode Object ----------------------------------------------------- */
980990

@@ -4097,6 +4107,11 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
40974107
return w;
40984108
}
40994109

4110+
/* Deprecated APIs */
4111+
4112+
_Py_COMP_DIAG_PUSH
4113+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
4114+
41004115
Py_UNICODE *
41014116
PyUnicode_AsUnicode(PyObject *unicode)
41024117
{
@@ -4135,6 +4150,8 @@ PyUnicode_GetSize(PyObject *unicode)
41354150
return -1;
41364151
}
41374152

4153+
_Py_COMP_DIAG_POP
4154+
41384155
Py_ssize_t
41394156
PyUnicode_GetLength(PyObject *unicode)
41404157
{
@@ -12364,6 +12381,8 @@ PyUnicode_IsIdentifier(PyObject *self)
1236412381
return len && i == len;
1236512382
}
1236612383
else {
12384+
_Py_COMP_DIAG_PUSH
12385+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1236712386
Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
1236812387
if (len == 0) {
1236912388
/* an empty string is not a valid identifier */
@@ -12401,6 +12420,7 @@ PyUnicode_IsIdentifier(PyObject *self)
1240112420
}
1240212421
}
1240312422
return 1;
12423+
_Py_COMP_DIAG_POP
1240412424
}
1240512425
}
1240612426

@@ -15955,7 +15975,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
1595515975
PyErr_BadArgument();
1595615976
return NULL;
1595715977
}
15978+
_Py_COMP_DIAG_PUSH
15979+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1595815980
u = PyUnicode_AsUnicodeAndSize(unicode, &len);
15981+
_Py_COMP_DIAG_POP
1595915982
if (u == NULL)
1596015983
return NULL;
1596115984
/* Ensure we won't overflow the size. */

Python/getargs.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
10271027
case 'u': /* raw unicode buffer (Py_UNICODE *) */
10281028
case 'Z': /* raw unicode buffer or None */
10291029
{
1030+
// TODO: Raise DeprecationWarning
1031+
_Py_COMP_DIAG_PUSH
1032+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
10301033
Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
10311034

10321035
if (*format == '#') {
@@ -1066,6 +1069,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
10661069
arg, msgbuf, bufsize);
10671070
}
10681071
break;
1072+
_Py_COMP_DIAG_POP
10691073
}
10701074

10711075
case 'e': {/* encoded string */

0 commit comments

Comments
 (0)