Skip to content

Commit 602f7cf

Browse files
committed
Issue #19424: Optimize PyUnicode_CompareWithASCIIString()
Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro. strlen() is still necessary to check Unicode string containing null bytes.
1 parent ab457a2 commit 602f7cf

1 file changed

Lines changed: 30 additions & 13 deletions

File tree

Objects/unicodeobject.c

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10573,25 +10573,42 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
1057310573
{
1057410574
Py_ssize_t i;
1057510575
int kind;
10576-
void *data;
1057710576
Py_UCS4 chr;
1057810577

1057910578
assert(_PyUnicode_CHECK(uni));
1058010579
if (PyUnicode_READY(uni) == -1)
1058110580
return -1;
1058210581
kind = PyUnicode_KIND(uni);
10583-
data = PyUnicode_DATA(uni);
10584-
/* Compare Unicode string and source character set string */
10585-
for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
10586-
if (chr != str[i])
10587-
return (chr < (unsigned char)(str[i])) ? -1 : 1;
10588-
/* This check keeps Python strings that end in '\0' from comparing equal
10589-
to C strings identical up to that point. */
10590-
if (PyUnicode_GET_LENGTH(uni) != i || chr)
10591-
return 1; /* uni is longer */
10592-
if (str[i])
10593-
return -1; /* str is longer */
10594-
return 0;
10582+
if (kind == PyUnicode_1BYTE_KIND) {
10583+
char *data = PyUnicode_1BYTE_DATA(uni);
10584+
Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni);
10585+
size_t len, len2 = strlen(str);
10586+
int cmp;
10587+
10588+
len = Py_MIN(len1, len2);
10589+
cmp = memcmp(data, str, len);
10590+
if (cmp != 0)
10591+
return cmp;
10592+
if (len1 > len2)
10593+
return 1; /* uni is longer */
10594+
if (len2 > len1)
10595+
return -1; /* str is longer */
10596+
return 0;
10597+
}
10598+
else {
10599+
void *data = PyUnicode_DATA(uni);
10600+
/* Compare Unicode string and source character set string */
10601+
for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
10602+
if (chr != str[i])
10603+
return (chr < (unsigned char)(str[i])) ? -1 : 1;
10604+
/* This check keeps Python strings that end in '\0' from comparing equal
10605+
to C strings identical up to that point. */
10606+
if (PyUnicode_GET_LENGTH(uni) != i || chr)
10607+
return 1; /* uni is longer */
10608+
if (str[i])
10609+
return -1; /* str is longer */
10610+
return 0;
10611+
}
1059510612
}
1059610613

1059710614

0 commit comments

Comments
 (0)