Skip to content

Commit 5f8c3fb

Browse files
authored
gh-91924: Optimize unicode_check_encoding_errors() (#93200)
Avoid _PyCodec_Lookup() and PyCodec_LookupError() for most common built-in encodings and error handlers to avoid creating a temporary Unicode string object, whereas these encodings and error handlers are known to be valid.
1 parent efc5d37 commit 5f8c3fb

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

Objects/unicodeobject.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,15 +454,29 @@ unicode_check_encoding_errors(const char *encoding, const char *errors)
454454
return 0;
455455
}
456456

457-
if (encoding != NULL) {
457+
if (encoding != NULL
458+
// Fast path for the most common built-in encodings. Even if the codec
459+
// is cached, _PyCodec_Lookup() decodes the bytes string from UTF-8 to
460+
// create a temporary Unicode string (the key in the cache).
461+
&& strcmp(encoding, "utf-8") != 0
462+
&& strcmp(encoding, "utf8") != 0
463+
&& strcmp(encoding, "ascii") != 0)
464+
{
458465
PyObject *handler = _PyCodec_Lookup(encoding);
459466
if (handler == NULL) {
460467
return -1;
461468
}
462469
Py_DECREF(handler);
463470
}
464471

465-
if (errors != NULL) {
472+
if (errors != NULL
473+
// Fast path for the most common built-in error handlers.
474+
&& strcmp(errors, "strict") != 0
475+
&& strcmp(errors, "ignore") != 0
476+
&& strcmp(errors, "replace") != 0
477+
&& strcmp(errors, "surrogateescape") != 0
478+
&& strcmp(errors, "surrogatepass") != 0)
479+
{
466480
PyObject *handler = PyCodec_LookupError(errors);
467481
if (handler == NULL) {
468482
return -1;

0 commit comments

Comments
 (0)