gh-97982: Remove asciilib_count()#98164
gh-97982: Remove asciilib_count()#98164vstinner merged 1 commit intopython:mainfrom vstinner:remove_asciilib_count
Conversation
|
I prefer to remove it to avoid people making the assumption that it's faster just because it exists: #98025 (comment)
|
|
Rationale for the removal: asciilib_count() is the same code than ucs1lib_count() and so has the same performance. See my benchmark: #98025 (comment) |
Objects/stringlib/count.h
Outdated
| // gh-97982: Implementing asciilib_count() is not worth it, FASTSEARCH() does | ||
| // not specialize the code for ASCII strings. Use ucs1lib_count() for ASCII and | ||
| // UCS1 strings: it's the same than asciilib_count(). | ||
| #if STRINGLIB_MAX_CHAR > 0x7Fu |
There was a problem hiding this comment.
Maybe !STRINGLIB_IS_UNICODE || STRINGLIB_MAX_CHAR > 0x7Fu?
STRINGLIB_MAX_CHAR was only used in codecs.h when STRINGLIB_IS_UNICODE is true.
There was a problem hiding this comment.
Oh thanks, that makes my PR shorter!
asciilib_count() is the same than ucs1lib_count(): the code is not specialized for ASCII strings, so it's not worth it to have a separated function. Remove asciilib_count() function.
|
I compared the output of The only difference are the function names, the code is the same. --- ASCII 2022-10-11 17:58:22.182135206 +0200
+++ UCS1 2022-10-11 17:58:25.069159913 +0200
@@ -1,5 +1,5 @@
static inline Py_ssize_t
-asciilib_count(const Py_UCS1* str, Py_ssize_t str_len,
+ucs1lib_count(const Py_UCS1* str, Py_ssize_t str_len,
const Py_UCS1* sub, Py_ssize_t sub_len,
Py_ssize_t maxcount)
{
@@ -8,13 +8,13 @@
return 0;
if (sub_len == 0)
return (str_len < maxcount) ? str_len + 1 : maxcount;
- count = asciilib_fastsearch(str, str_len, sub, sub_len, maxcount, 0);
+ count = ucs1lib_fastsearch(str, str_len, sub, sub_len, maxcount, 0);
if (count < 0)
return 0;
return count;
}
static inline Py_ssize_t
-asciilib_find_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
+ucs1lib_find_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
{
const Py_UCS1 *p, *e;
p = s;
@@ -35,7 +35,7 @@
return -1;
}
static inline Py_ssize_t
-asciilib_rfind_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
+ucs1lib_rfind_char(const Py_UCS1* s, Py_ssize_t n, Py_UCS1 ch)
{
const Py_UCS1 *p;
if (n > 15) {
@@ -55,7 +55,7 @@
return -1;
}
static inline Py_ssize_t
-asciilib__lex_search(const Py_UCS1 *needle, Py_ssize_t len_needle,
+ucs1lib__lex_search(const Py_UCS1 *needle, Py_ssize_t len_needle,
Py_ssize_t *return_period, int invert_alphabet)
{
Py_ssize_t max_suffix = 0;
@@ -90,13 +90,13 @@
return max_suffix;
}
static inline Py_ssize_t
-asciilib__factorize(const Py_UCS1 *needle,
+ucs1lib__factorize(const Py_UCS1 *needle,
Py_ssize_t len_needle,
Py_ssize_t *return_period)
{
Py_ssize_t cut1, period1, cut2, period2, cut, period;
- cut1 = asciilib__lex_search(needle, len_needle, &period1, 0);
- cut2 = asciilib__lex_search(needle, len_needle, &period2, 1);
+ cut1 = ucs1lib__lex_search(needle, len_needle, &period1, 0);
+ cut2 = ucs1lib__lex_search(needle, len_needle, &period2, 1);
if (cut1 > cut2) {
period = period1;
cut = cut1;
@@ -111,7 +111,7 @@
*return_period = period;
return cut;
}
-typedef struct asciilib__pre {
+typedef struct ucs1lib__pre {
const Py_UCS1 *needle;
Py_ssize_t len_needle;
Py_ssize_t cut;
@@ -119,14 +119,14 @@
Py_ssize_t gap;
int is_periodic;
uint8_t table[(1U << 6u)];
-} asciilib_prework;
+} ucs1lib_prework;
static void
-asciilib__preprocess(const Py_UCS1 *needle, Py_ssize_t len_needle,
- asciilib_prework *p)
+ucs1lib__preprocess(const Py_UCS1 *needle, Py_ssize_t len_needle,
+ ucs1lib_prework *p)
{
p->needle = needle;
p->len_needle = len_needle;
- p->cut = asciilib__factorize(needle, len_needle, &(p->period));
+ p->cut = ucs1lib__factorize(needle, len_needle, &(p->period));
((
p->period + p->cut <= len_needle
) ? (void) (0) : __assert_fail (
@@ -191,8 +191,8 @@
}
}
static Py_ssize_t
-asciilib__two_way(const Py_UCS1 *haystack, Py_ssize_t len_haystack,
- asciilib_prework *p)
+ucs1lib__two_way(const Py_UCS1 *haystack, Py_ssize_t len_haystack,
+ ucs1lib_prework *p)
{
const Py_ssize_t len_needle = p->len_needle;
const Py_ssize_t cut = p->cut;
@@ -333,30 +333,30 @@
return -1;
}
static Py_ssize_t
-asciilib__two_way_find(const Py_UCS1 *haystack,
+ucs1lib__two_way_find(const Py_UCS1 *haystack,
Py_ssize_t len_haystack,
const Py_UCS1 *needle,
Py_ssize_t len_needle)
{
;
- asciilib_prework p;
- asciilib__preprocess(needle, len_needle, &p);
- return asciilib__two_way(haystack, len_haystack, &p);
+ ucs1lib_prework p;
+ ucs1lib__preprocess(needle, len_needle, &p);
+ return ucs1lib__two_way(haystack, len_haystack, &p);
}
static Py_ssize_t
-asciilib__two_way_count(const Py_UCS1 *haystack,
+ucs1lib__two_way_count(const Py_UCS1 *haystack,
Py_ssize_t len_haystack,
const Py_UCS1 *needle,
Py_ssize_t len_needle,
Py_ssize_t maxcount)
{
;
- asciilib_prework p;
- asciilib__preprocess(needle, len_needle, &p);
+ ucs1lib_prework p;
+ ucs1lib__preprocess(needle, len_needle, &p);
Py_ssize_t index = 0, count = 0;
while (1) {
Py_ssize_t result;
- result = asciilib__two_way(haystack + index,
+ result = ucs1lib__two_way(haystack + index,
len_haystack - index, &p);
if (result == -1) {
return count;
@@ -370,7 +370,7 @@
return count;
}
static inline Py_ssize_t
-asciilib_default_find(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_default_find(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -422,7 +422,7 @@
return mode == 0 ? count : -1;
}
static Py_ssize_t
-asciilib_adaptive_find(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_adaptive_find(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -462,11 +462,11 @@
hits += j + 1;
if (hits > m / 4 && w - i > 2000) {
if (mode == 1) {
- res = asciilib__two_way_find(s + i, n - i, p, m);
+ res = ucs1lib__two_way_find(s + i, n - i, p, m);
return res == -1 ? -1 : res + i;
}
else {
- res = asciilib__two_way_count(s + i, n - i, p, m,
+ res = ucs1lib__two_way_count(s + i, n - i, p, m,
maxcount - count);
return res + count;
}
@@ -487,7 +487,7 @@
return mode == 0 ? count : -1;
}
static Py_ssize_t
-asciilib_default_rfind(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_default_rfind(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -526,7 +526,7 @@
return -1;
}
static inline Py_ssize_t
-asciilib_count_char(const Py_UCS1 *s, Py_ssize_t n,
+ucs1lib_count_char(const Py_UCS1 *s, Py_ssize_t n,
const Py_UCS1 p0, Py_ssize_t maxcount)
{
Py_ssize_t i, count = 0;
@@ -541,7 +541,7 @@
return count;
}
static inline Py_ssize_t
-asciilib_fastsearch(const Py_UCS1* s, Py_ssize_t n,
+ucs1lib_fastsearch(const Py_UCS1* s, Py_ssize_t n,
const Py_UCS1* p, Py_ssize_t m,
Py_ssize_t maxcount, int mode)
{
@@ -553,30 +553,30 @@
return -1;
}
if (mode == 1)
- return asciilib_find_char(s, n, p[0]);
+ return ucs1lib_find_char(s, n, p[0]);
else if (mode == 2)
- return asciilib_rfind_char(s, n, p[0]);
+ return ucs1lib_rfind_char(s, n, p[0]);
else {
- return asciilib_count_char(s, n, p[0], maxcount);
+ return ucs1lib_count_char(s, n, p[0], maxcount);
}
}
if (mode != 2) {
if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
- return asciilib_default_find(s, n, p, m, maxcount, mode);
+ return ucs1lib_default_find(s, n, p, m, maxcount, mode);
}
else if ((m >> 2) * 3 < (n >> 2)) {
if (mode == 1) {
- return asciilib__two_way_find(s, n, p, m);
+ return ucs1lib__two_way_find(s, n, p, m);
}
else {
- return asciilib__two_way_count(s, n, p, m, maxcount);
+ return ucs1lib__two_way_count(s, n, p, m, maxcount);
}
}
else {
- return asciilib_adaptive_find(s, n, p, m, maxcount, mode);
+ return ucs1lib_adaptive_find(s, n, p, m, maxcount, mode);
}
}
else {
- return asciilib_default_rfind(s, n, p, m, maxcount, mode);
+ return ucs1lib_default_rfind(s, n, p, m, maxcount, mode);
}
} |
* main: (38 commits) pythongh-98251: Allow venv to pass along PYTHON* variables to pip and ensurepip when they do not impact path resolution (pythonGH-98259) Bpo-41246: IOCP Proactor avoid callback code duplication (python#21399) bpo-46364: Use sockets for stdin of asyncio only on AIX (python#30596) pythongh-98178: syslog() is not thread-safe on macOS (python#98213) Mark all targets in `Doc/Makefile` as `PHONY` (pythonGH-98189) pythongh-97982: Factorize PyUnicode_Count() and unicode_count() code (python#98025) pythongh-96265: Formatting changes for faq/general (python#98129) tutorial: remove "with single quotes" (python#98204) pythongh-97669: Remove Tools/scripts/startuptime.py (python#98214) signalmodule.c uses _PyErr_WriteUnraisableMsg() (python#98217) pythongh-97669: Fix test_tools reference leak (python#98216) pythongh-97669: Create Tools/patchcheck/ directory (python#98186) pythongh-65046: Link to logging cookbook from asyncio docs (python#98207) Formatting fixes in contextlib docs (python#98111) pythongh-95276: Add callable entry to the glossary (python#95738) pythongh-96130: Rephrase use of "typecheck" verb for clarity (python#98144) Fix some incorrect indentation around the main switch (python#98177) pythongh-98172: Fix formatting in `except*` docs (python#98173) pythongh-97982: Remove asciilib_count() (python#98164) pythongh-95756: Free and NULL-out code caches when needed (pythonGH-98181) ...
* main: (37 commits) pythongh-98251: Allow venv to pass along PYTHON* variables to pip and ensurepip when they do not impact path resolution (pythonGH-98259) Bpo-41246: IOCP Proactor avoid callback code duplication (python#21399) bpo-46364: Use sockets for stdin of asyncio only on AIX (python#30596) pythongh-98178: syslog() is not thread-safe on macOS (python#98213) Mark all targets in `Doc/Makefile` as `PHONY` (pythonGH-98189) pythongh-97982: Factorize PyUnicode_Count() and unicode_count() code (python#98025) pythongh-96265: Formatting changes for faq/general (python#98129) tutorial: remove "with single quotes" (python#98204) pythongh-97669: Remove Tools/scripts/startuptime.py (python#98214) signalmodule.c uses _PyErr_WriteUnraisableMsg() (python#98217) pythongh-97669: Fix test_tools reference leak (python#98216) pythongh-97669: Create Tools/patchcheck/ directory (python#98186) pythongh-65046: Link to logging cookbook from asyncio docs (python#98207) Formatting fixes in contextlib docs (python#98111) pythongh-95276: Add callable entry to the glossary (python#95738) pythongh-96130: Rephrase use of "typecheck" verb for clarity (python#98144) Fix some incorrect indentation around the main switch (python#98177) pythongh-98172: Fix formatting in `except*` docs (python#98173) pythongh-97982: Remove asciilib_count() (python#98164) pythongh-95756: Free and NULL-out code caches when needed (pythonGH-98181) ...
asciilib_count() is the same than ucs1lib_count(): the code is not specialized for ASCII strings, so it's not worth it to have a separated function. Remove asciilib_count() function.
Define STRINGLIB_MAX_CHAR in C files of bytes strings.
PyUncode_Countandunicode_count#97982