Skip to content

Add ability to emulate thread-safe locale operations #21971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: blead
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -3825,6 +3825,16 @@ p |void |dump_sv_child |NN SV *sv
CRTip |unsigned int|variant_byte_number \
|PERL_UINTMAX_T word
#endif
#if defined(EMULATE_THREAD_SAFE_LOCALES)
Cp |void |category_lock |const UV mask \
|NN const char *file \
|const line_t caller_line
Cp |void |category_unlock|const UV mask \
|NN const char *file \
|const line_t caller_line
Cip |int |posix_LC_foo_ |const int c \
|const U8 classnum
#endif
#if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
ARdp |I32 |my_chsize |int fd \
|Off_t length
Expand Down Expand Up @@ -4517,6 +4527,13 @@ RS |char * |my_setlocale_debug_string_i \
|NULLOK const char *retval \
|const line_t line
# endif
# if defined(EMULATE_THREAD_SAFE_LOCALES) || \
( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) )
S |void |update_PL_curlocales_i \
|const locale_category_index index \
|NN const char *new_locale \
|const line_t caller_line
# endif
# if defined(HAS_LOCALECONV) && \
( defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC) )
S |void |populate_hash_from_localeconv \
Expand Down Expand Up @@ -4577,14 +4594,8 @@ S |const char *|querylocale_2008_i \
|const locale_category_index index \
|const line_t line
S |locale_t|use_curlocale_scratch
# if !defined(USE_QUERYLOCALE)
S |void |update_PL_curlocales_i \
|const locale_category_index index \
|NN const char *new_locale \
|const line_t caller_line
# endif
# elif defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) && \
!defined(USE_THREAD_SAFE_LOCALE_EMULATION)
# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \
defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE)
S |bool |less_dicey_bool_setlocale_r \
|const int cat \
|NN const char *locale
Expand Down
17 changes: 11 additions & 6 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,11 @@
# if !defined(EBCDIC)
# define variant_byte_number Perl_variant_byte_number
# endif
# if defined(EMULATE_THREAD_SAFE_LOCALES)
# define category_lock(a,b,c) Perl_category_lock(aTHX_ a,b,c)
# define category_unlock(a,b,c) Perl_category_unlock(aTHX_ a,b,c)
# define posix_LC_foo_(a,b) Perl_posix_LC_foo_(aTHX_ a,b)
# endif
# if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
# define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b)
# endif
Expand Down Expand Up @@ -1343,6 +1348,10 @@
# if defined(DEBUGGING)
# define my_setlocale_debug_string_i(a,b,c,d) S_my_setlocale_debug_string_i(aTHX_ a,b,c,d)
# endif
# if defined(EMULATE_THREAD_SAFE_LOCALES) || \
( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) )
# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c)
# endif
# if defined(HAS_LOCALECONV) && \
( defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC) )
# define populate_hash_from_localeconv(a,b,c,d,e) S_populate_hash_from_localeconv(aTHX_ a,b,c,d,e)
Expand Down Expand Up @@ -1375,12 +1384,8 @@
# define bool_setlocale_2008_i(a,b,c) S_bool_setlocale_2008_i(aTHX_ a,b,c)
# define querylocale_2008_i(a,b) S_querylocale_2008_i(aTHX_ a,b)
# define use_curlocale_scratch() S_use_curlocale_scratch(aTHX)
# if !defined(USE_QUERYLOCALE)
# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c)
# endif
# elif defined(USE_LOCALE_THREADS) && \
!defined(USE_THREAD_SAFE_LOCALE) && \
!defined(USE_THREAD_SAFE_LOCALE_EMULATION)
# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \
defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE)
# define less_dicey_bool_setlocale_r(a,b) S_less_dicey_bool_setlocale_r(aTHX_ a,b)
# define less_dicey_setlocale_r(a,b) S_less_dicey_setlocale_r(aTHX_ a,b)
# endif
Expand Down
4 changes: 4 additions & 0 deletions embedvar.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 17 additions & 4 deletions handy.h
Original file line number Diff line number Diff line change
Expand Up @@ -1555,9 +1555,16 @@ or casts

# define HIGHEST_REGCOMP_DOT_H_SYNC_ CC_VERTSPACE_

/* The members of the third group below do not need to be coordinated with data
* structures in regcomp.[ch] and regexec.c. */
# define CC_IDFIRST_ 16
/* These three follow immediately after the final function that has a version
* defined by C, like isascii(), so they overlap with anything else. They are
* used in the 'PL_libc_char_fcns' data structure, along with the ones above
* them */
# define CC_IDFIRST_ 16
# define CC_TOLOWER_ (CC_IDFIRST_ + 1)
# define CC_TOUPPER_ (CC_TOLOWER_ + 1)

/* The members of the fourth group below do not need to be coordinated with
* data structures in regcomp.[ch] and regexec.c. */
# define CC_CHARNAME_CONT_ 17
# define CC_NONLATIN1_FOLD_ 18
# define CC_NONLATIN1_SIMPLE_FOLD_ 19
Expand Down Expand Up @@ -2025,7 +2032,7 @@ END_EXTERN_C
# define is_posix_XDIGIT(c) isxdigit((U8) (c))
#endif

/* Below is the next level up, which currently expands to nothing more
/* Below is the next level up, which on most platforms expands to nothing more
* than the previous layer. These are the macros to use if you really need
* something whose input domain is a byte, and the locale isn't UTF-8; that is,
* where you normally would have to use things like bare isalnum().
Expand All @@ -2037,7 +2044,13 @@ END_EXTERN_C
* (Note, proper general operation of the bare libc functions requires you to
* cast to U8. These do that for you automatically.) */

/* In this one circumstance, the macro is implemented with a lock; otherwise it
* expands to just the layer below */
#ifdef EMULATE_THREAD_SAFE_LOCALES
# define WRAP_U8_LC_(c, classnum, posix) posix_LC_foo_((c), (classnum))
#else
# define WRAP_U8_LC_(c, classnum, posix) posix(c)
#endif

#define isU8_ALPHANUMERIC_LC(c) \
WRAP_U8_LC_((c), CC_ALPHANUMERIC_, is_posix_ALPHANUMERIC)
Expand Down
43 changes: 43 additions & 0 deletions inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,49 @@ S_PerlEnv_putenv(pTHX_ char * str)

#endif

/* ------------------------------- handy.h ------------------------------- */

#ifdef EMULATE_THREAD_SAFE_LOCALES

PERL_STATIC_INLINE int
Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum) {
int result;

LC_CTYPE_LOCK;

/* All calls to this (so far) are with a 'classnum' known at compile time,
* so the compiler should constant fold this down to a single assignment */
switch (classnum) {
case CC_ALPHANUMERIC_:result = (bool) is_posix_ALPHANUMERIC(c); break;
case CC_ALPHA_: result = (bool) is_posix_ALPHA(c); break;
case CC_ASCII_: result = (bool) is_posix_ASCII(c); break;
case CC_BLANK_: result = (bool) is_posix_BLANK(c); break;
case CC_CASED_: result = (bool) is_posix_CASED(c); break;
case CC_CNTRL_: result = (bool) is_posix_CNTRL(c); break;
case CC_DIGIT_: result = (bool) is_posix_DIGIT(c); break;
case CC_GRAPH_: result = (bool) is_posix_GRAPH(c); break;
case CC_LOWER_: result = (bool) is_posix_LOWER(c); break;
case CC_PRINT_: result = (bool) is_posix_PRINT(c); break;
case CC_PUNCT_: result = (bool) is_posix_PUNCT(c); break;
case CC_SPACE_: result = (bool) is_posix_SPACE(c); break;
case CC_UPPER_: result = (bool) is_posix_UPPER(c); break;
case CC_WORDCHAR_: result = (bool) is_posix_WORDCHAR(c); break;
case CC_XDIGIT_: result = (bool) is_posix_XDIGIT(c); break;
case CC_IDFIRST_: result = (bool) is_posix_IDFIRST(c); break;
case CC_TOLOWER_: result = to_posix_LOWER(c); break;
case CC_TOUPPER_: result = to_posix_UPPER(c); break;

default:
LC_CTYPE_UNLOCK;
locale_panic_(Perl_form(aTHX_ "Unknown charclass %d", classnum));
}

LC_CTYPE_UNLOCK;
return result;
}

#endif

/* ------------------------------- mg.h ------------------------------- */

#if defined(PERL_CORE) || defined(PERL_EXT)
Expand Down
8 changes: 8 additions & 0 deletions intrpvar.h
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,15 @@ PERLVARI(I, cur_locale_obj, locale_t, LC_GLOBAL_LOCALE)
* is almost always toggled into the C locale, and the locale it nominally is
* is stored as PL_numeric_name. */
PERLVARA(I, curlocales, LOCALE_CATEGORIES_COUNT_ + 1, const char *)
#endif
#ifdef EMULATE_THREAD_SAFE_LOCALES
PERLVARA(I, restore_locale, LOCALE_CATEGORIES_COUNT_, const char *)
PERLVARA(I, restore_locale_depth, LOCALE_CATEGORIES_COUNT_, Size_t)
PERLVARI(I, NUMERIC_toggle_depth, int, 0)
#endif

#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE))
PERLVARI(I, perl_controls_locale, bool, true)
#endif
#ifdef USE_PL_CUR_LC_ALL
PERLVARI(I, cur_LC_ALL, const char *, NULL)
Expand Down
Loading