Skip to content

Commit

Permalink
[libc++] Implement a libc++ private version of isascii (llvm#122361)
Browse files Browse the repository at this point in the history
The isascii() function is not standard, so we should avoid relying on
the platform providing it, especially since it's easy to implement in
libc++ portably.
  • Loading branch information
ldionne authored Jan 10, 2025
1 parent 749bdc8 commit 4c6ca3e
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 37 deletions.
10 changes: 6 additions & 4 deletions libcxx/include/__locale
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,8 @@ protected:
};
#endif // _LIBCPP_HAS_WIDE_CHARACTERS

inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_isascii(int __c) { return (__c & ~0x7F) == 0; }

template <>
class _LIBCPP_EXPORTED_FROM_ABI ctype<char> : public locale::facet, public ctype_base {
const mask* __tab_;
Expand All @@ -527,25 +529,25 @@ public:
explicit ctype(const mask* __tab = nullptr, bool __del = false, size_t __refs = 0);

_LIBCPP_HIDE_FROM_ABI bool is(mask __m, char_type __c) const {
return isascii(__c) ? (__tab_[static_cast<int>(__c)] & __m) != 0 : false;
return std::__libcpp_isascii(__c) ? (__tab_[static_cast<int>(__c)] & __m) != 0 : false;
}

_LIBCPP_HIDE_FROM_ABI const char_type* is(const char_type* __low, const char_type* __high, mask* __vec) const {
for (; __low != __high; ++__low, ++__vec)
*__vec = isascii(*__low) ? __tab_[static_cast<int>(*__low)] : 0;
*__vec = std::__libcpp_isascii(*__low) ? __tab_[static_cast<int>(*__low)] : 0;
return __low;
}

_LIBCPP_HIDE_FROM_ABI const char_type* scan_is(mask __m, const char_type* __low, const char_type* __high) const {
for (; __low != __high; ++__low)
if (isascii(*__low) && (__tab_[static_cast<int>(*__low)] & __m))
if (std::__libcpp_isascii(*__low) && (__tab_[static_cast<int>(*__low)] & __m))
break;
return __low;
}

_LIBCPP_HIDE_FROM_ABI const char_type* scan_not(mask __m, const char_type* __low, const char_type* __high) const {
for (; __low != __high; ++__low)
if (!isascii(*__low) || !(__tab_[static_cast<int>(*__low)] & __m))
if (!std::__libcpp_isascii(*__low) || !(__tab_[static_cast<int>(*__low)] & __m))
break;
return __low;
}
Expand Down
71 changes: 38 additions & 33 deletions libcxx/src/locale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -707,69 +707,70 @@ constinit locale::id ctype<wchar_t>::id;
ctype<wchar_t>::~ctype() {}

bool ctype<wchar_t>::do_is(mask m, char_type c) const {
return isascii(c) ? (ctype<char>::classic_table()[c] & m) != 0 : false;
return std::__libcpp_isascii(c) ? (ctype<char>::classic_table()[c] & m) != 0 : false;
}

const wchar_t* ctype<wchar_t>::do_is(const char_type* low, const char_type* high, mask* vec) const {
for (; low != high; ++low, ++vec)
*vec = static_cast<mask>(isascii(*low) ? ctype<char>::classic_table()[*low] : 0);
*vec = static_cast<mask>(std::__libcpp_isascii(*low) ? ctype<char>::classic_table()[*low] : 0);
return low;
}

const wchar_t* ctype<wchar_t>::do_scan_is(mask m, const char_type* low, const char_type* high) const {
for (; low != high; ++low)
if (isascii(*low) && (ctype<char>::classic_table()[*low] & m))
if (std::__libcpp_isascii(*low) && (ctype<char>::classic_table()[*low] & m))
break;
return low;
}

const wchar_t* ctype<wchar_t>::do_scan_not(mask m, const char_type* low, const char_type* high) const {
for (; low != high; ++low)
if (!(isascii(*low) && (ctype<char>::classic_table()[*low] & m)))
if (!(std::__libcpp_isascii(*low) && (ctype<char>::classic_table()[*low] & m)))
break;
return low;
}

wchar_t ctype<wchar_t>::do_toupper(char_type c) const {
# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
return std::__libcpp_isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
# else
return (isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c;
return (std::__libcpp_isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c;
# endif
}

const wchar_t* ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const {
for (; low != high; ++low)
# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
*low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
*low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
*low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low;
*low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low;
# else
*low = (isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low;
*low =
(std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low;
# endif
return low;
}

wchar_t ctype<wchar_t>::do_tolower(char_type c) const {
# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
return std::__libcpp_isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
# else
return (isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c;
return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c;
# endif
}

const wchar_t* ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const {
for (; low != high; ++low)
# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
*low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
*low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
*low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low;
*low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low;
# else
*low = (isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low;
*low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low;
# endif
return low;
}
Expand All @@ -783,14 +784,14 @@ const char* ctype<wchar_t>::do_widen(const char* low, const char* high, char_typ
}

char ctype<wchar_t>::do_narrow(char_type c, char dfault) const {
if (isascii(c))
if (std::__libcpp_isascii(c))
return static_cast<char>(c);
return dfault;
}

const wchar_t* ctype<wchar_t>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const {
for (; low != high; ++low, ++dest)
if (isascii(*low))
if (std::__libcpp_isascii(*low))
*dest = static_cast<char>(*low);
else
*dest = dfault;
Expand All @@ -816,52 +817,56 @@ ctype<char>::~ctype() {

char ctype<char>::do_toupper(char_type c) const {
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
return isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
#elif defined(__NetBSD__)
return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
return isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
return std::__libcpp_isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
#else
return (isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c;
return (std::__libcpp_isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c;
#endif
}

const char* ctype<char>::do_toupper(char_type* low, const char_type* high) const {
for (; low != high; ++low)
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
*low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
*low = std::__libcpp_isascii(*low)
? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)])
: *low;
#elif defined(__NetBSD__)
*low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
*low = isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
*low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
#else
*low = (isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low;
*low = (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low;
#endif
return low;
}

char ctype<char>::do_tolower(char_type c) const {
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
return isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
#elif defined(__NetBSD__)
return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
return isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
return std::__libcpp_isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
#else
return (isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c;
return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c;
#endif
}

const char* ctype<char>::do_tolower(char_type* low, const char_type* high) const {
for (; low != high; ++low)
#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
*low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
*low = std::__libcpp_isascii(*low)
? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)])
: *low;
#elif defined(__NetBSD__)
*low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
*low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
*low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
#else
*low = (isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low;
*low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low;
#endif
return low;
}
Expand All @@ -875,14 +880,14 @@ const char* ctype<char>::do_widen(const char* low, const char* high, char_type*
}

char ctype<char>::do_narrow(char_type c, char dfault) const {
if (isascii(c))
if (std::__libcpp_isascii(c))
return static_cast<char>(c);
return dfault;
}

const char* ctype<char>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const {
for (; low != high; ++low, ++dest)
if (isascii(*low))
if (std::__libcpp_isascii(*low))
*dest = *low;
else
*dest = dfault;
Expand Down Expand Up @@ -1140,7 +1145,7 @@ bool ctype_byname<wchar_t>::do_is(mask m, char_type c) const {

const wchar_t* ctype_byname<wchar_t>::do_is(const char_type* low, const char_type* high, mask* vec) const {
for (; low != high; ++low, ++vec) {
if (isascii(*low))
if (std::__libcpp_isascii(*low))
*vec = static_cast<mask>(ctype<char>::classic_table()[*low]);
else {
*vec = 0;
Expand Down

0 comments on commit 4c6ca3e

Please sign in to comment.