llvm · michaelrj-google · Dec 3, 2024 · Sep 25, 2024 · Nov 14, 2024 · Nov 21, 2024
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h
@@ -178,9 +178,11 @@ class HighPrecisionDecimal {
       if (digit_index >= this->num_digits) {
         return new_digits - 1;
       }
-      if (this->digits[digit_index] != power_of_five[digit_index] - '0') {
+      if (this->digits[digit_index] !=
+          internal::b36_char_to_int(power_of_five[digit_index])) {
         return new_digits -
-               ((this->digits[digit_index] < power_of_five[digit_index] - '0')
+               ((this->digits[digit_index] <
+                 internal::b36_char_to_int(power_of_five[digit_index]))
                     ? 1
                     : 0);
       }
@@ -337,8 +339,8 @@ class HighPrecisionDecimal {
         }
         ++total_digits;
         if (this->num_digits < MAX_NUM_DIGITS) {
-          this->digits[this->num_digits] =
-              static_cast<uint8_t>(num_string[num_cur] - '0');
+          this->digits[this->num_digits] = static_cast<uint8_t>(
+              internal::b36_char_to_int(num_string[num_cur]));
           ++this->num_digits;
         } else if (num_string[num_cur] != '0') {
           this->truncated = true;

diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h
@@ -13,12 +13,13 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
 
-#include "src/__support/CPP/limits.h"        // CHAR_BIT
+#include "src/__support/CPP/limits.h" // CHAR_BIT
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"
-#include "src/__support/uint128.h"           // UInt128
-#include <stddef.h>                          // size_t
-#include <stdint.h>                          // uintxx_t
+#include "src/__support/uint128.h" // UInt128
+#include <stddef.h>                // size_t
+#include <stdint.h>                // uintxx_t
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -75,26 +76,13 @@ template <typename T, int base> struct DigitBuffer {
       push(*str);
   }
 
-  // Returns the digit for a particular character.
-  // Returns INVALID_DIGIT if the character is invalid.
-  LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) {
-    const auto to_lower = [](char c) { return c | 32; };
-    const auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
-    const auto is_alpha = [](char c) {
-      return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
-    };
-    if (is_digit(c))
-      return static_cast<uint8_t>(c - '0');
-    if (base > 10 && is_alpha(c))
-      return static_cast<uint8_t>(to_lower(c) - 'a' + 10);
-    return INVALID_DIGIT;
-  }
-
   // Adds a single character to this buffer.
   LIBC_INLINE constexpr void push(char c) {
     if (c == '\'')
       return; // ' is valid but not taken into account.
-    const uint8_t value = get_digit_value(c);
+    const int b36_val = internal::b36_char_to_int(c);
+    const uint8_t value = static_cast<uint8_t>(
+        b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT);
     if (value == INVALID_DIGIT || size >= MAX_DIGITS) {
       // During constant evaluation `__builtin_unreachable` will halt the
       // compiler as it is not executable. This is preferable over `assert` that

diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h
@@ -69,6 +69,7 @@
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
 #include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -214,9 +215,9 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
     using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
 
     LIBC_INLINE static char digit_char(uint8_t digit) {
-      if (digit < 10)
-        return '0' + static_cast<char>(digit);
-      return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
+      const int result = internal::int_to_b36_char(digit);
+      return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
+                                                 : result);
     }
 
     LIBC_INLINE static void

diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
@@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
       cpp::numeric_limits<StorageType>::max() / BASE;
   while (true) {
     if (isdigit(src[index])) {
-      uint32_t digit = src[index] - '0';
+      uint32_t digit = b36_char_to_int(src[index]);
       seen_digit = true;
 
       if (mantissa < bitstype_max_div_by_base) {

diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
@@ -42,22 +42,14 @@ first_non_whitespace(const char *__restrict src,
   return src + src_cur;
 }
 
-LIBC_INLINE int b36_char_to_int(char input) {
-  if (isdigit(input))
-    return input - '0';
-  if (isalpha(input))
-    return (input | 32) + 10 - 'a';
-  return 0;
-}
-
 // checks if the next 3 characters of the string pointer are the start of a
 // hexadecimal number. Does not advance the string pointer.
 LIBC_INLINE bool
 is_hex_start(const char *__restrict src,
              size_t src_len = cpp::numeric_limits<size_t>::max()) {
   if (src_len < 3)
     return false;
-  return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
+  return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
          b36_char_to_int(*(src + 2)) < 16;
 }
 

diff --git a/libc/src/ctype/isxdigit.cpp b/libc/src/ctype/isxdigit.cpp
@@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) {
   const unsigned ch = static_cast<unsigned>(c);
-  return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
+  return static_cast<int>(internal::isalnum(ch) &&
+                          internal::b36_char_to_int(ch) < 16);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/ctype/isxdigit_l.cpp b/libc/src/ctype/isxdigit_l.cpp
@@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) {
   const unsigned ch = static_cast<unsigned>(c);
-  return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
+  return static_cast<int>(internal::isalnum(ch) &&
+                          internal::b36_char_to_int(ch) < 16);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/ctype/toupper.cpp b/libc/src/ctype/toupper.cpp
@@ -14,10 +14,6 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(int, toupper, (int c)) {
-  if (internal::islower(c))
-    return c - ('a' - 'A');
-  return c;
-}
+LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/ctype/toupper_l.cpp b/libc/src/ctype/toupper_l.cpp
@@ -15,9 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) {
-  if (internal::islower(c))
-    return c - ('a' - 'A');
-  return c;
+  return internal::toupper(c);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h
@@ -11,6 +11,7 @@
 
 #include "include/llvm-libc-macros/stdfix-macros.h"
 #include "src/__support/CPP/string_view.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/fixed_point/fx_bits.h"
 #include "src/__support/fixed_point/fx_rep.h"
 #include "src/__support/integer_to_string.h"
@@ -68,10 +69,6 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
   using LARep = fixed_point::FXRep<unsigned long accum>;
   using StorageType = LARep::StorageType;
 
-  // All of the letters will be defined relative to variable a, which will be
-  // the appropriate case based on the name of the conversion. This converts any
-  // conversion name into the letter 'a' with the appropriate case.
-  const char a = (to_conv.conv_name & 32) | 'A';
   FormatFlags flags = to_conv.flags;
 
   bool is_negative;
@@ -179,9 +176,9 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
     // unspecified.
     RoundDirection round;
     char first_digit_after = fraction_digits[precision];
-    if (first_digit_after > '5') {
+    if (internal::b36_char_to_int(first_digit_after) > 5) {
       round = RoundDirection::Up;
-    } else if (first_digit_after < '5') {
+    } else if (internal::b36_char_to_int(first_digit_after) < 5) {
       round = RoundDirection::Down;
     } else {
       // first_digit_after == '5'
@@ -204,7 +201,8 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
         keep_rounding = false;
         char cur_digit = fraction_digits[digit_to_round];
         // if the digit should not be rounded up
-        if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) {
+        if (round == RoundDirection::Even &&
+            (internal::b36_char_to_int(cur_digit) % 2) == 0) {
           // break out of the loop
           break;
         }
@@ -246,7 +244,7 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
   char sign_char = 0;
 
   // Check if the conv name is uppercase
-  if (a == 'A') {
+  if (internal::isupper(to_conv.conv_name)) {
     // These flags are only for signed conversions, so this removes them if the
     // conversion is unsigned.
     flags = FormatFlags(flags &

diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h
@@ -13,6 +13,7 @@
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/rounding_mode.h"
 #include "src/__support/big_int.h" // is_big_int_v
+#include "src/__support/ctype_utils.h"
 #include "src/__support/float_to_string.h"
 #include "src/__support/integer_to_string.h"
 #include "src/__support/libc_assert.h"
@@ -587,8 +588,6 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
   int exponent = float_bits.get_explicit_exponent();
   StorageType mantissa = float_bits.get_explicit_mantissa();
 
-  const char a = (to_conv.conv_name & 32) | 'A';
-
   char sign_char = 0;
 
   if (float_bits.is_neg())
@@ -734,7 +733,8 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
   round = get_round_direction(last_digit, truncated, float_bits.sign());
 
   RET_IF_RESULT_NEGATIVE(float_writer.write_last_block(
-      digits, maximum, round, final_exponent, a + 'E' - 'A'));
+      digits, maximum, round, final_exponent,
+      internal::islower(to_conv.conv_name) ? 'e' : 'E'));
 
   RET_IF_RESULT_NEGATIVE(float_writer.right_pad());
   return WRITE_OK;

diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h
@@ -12,6 +12,7 @@
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
@@ -28,10 +29,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
                                       const FormatSection &to_conv) {
   using LDBits = fputil::FPBits<long double>;
   using StorageType = LDBits::StorageType;
-  // All of the letters will be defined relative to variable a, which will be
-  // the appropriate case based on the name of the conversion. This converts any
-  // conversion name into the letter 'a' with the appropriate case.
-  const char a = (to_conv.conv_name & 32) | 'A';
 
   bool is_negative;
   int exponent;
@@ -138,9 +135,10 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
   size_t mant_cur = mant_len;
   size_t first_non_zero = 1;
   for (; mant_cur > 0; --mant_cur, mantissa >>= 4) {
-    char mant_mod_16 = static_cast<char>(mantissa) & 15;
-    char new_digit = static_cast<char>(
-        (mant_mod_16 > 9) ? (mant_mod_16 - 10 + a) : (mant_mod_16 + '0'));
+    char mant_mod_16 = static_cast<char>(mantissa % 16);
+    char new_digit = static_cast<char>(internal::int_to_b36_char(mant_mod_16));
+    if (internal::isupper(to_conv.conv_name))
+      new_digit = static_cast<char>(internal::toupper(new_digit));
     mant_buffer[mant_cur - 1] = new_digit;
     if (new_digit != '0' && first_non_zero < mant_cur)
       first_non_zero = mant_cur;
@@ -168,7 +166,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
 
   size_t exp_cur = EXP_LEN;
   for (; exponent > 0; --exp_cur, exponent /= 10) {
-    exp_buffer[exp_cur - 1] = static_cast<char>((exponent % 10) + '0');
+    exp_buffer[exp_cur - 1] =
+        static_cast<char>(internal::int_to_b36_char(exponent % 10));
   }
   if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0.
     exp_buffer[EXP_LEN - 1] = '0';
@@ -187,7 +186,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
   constexpr size_t PREFIX_LEN = 2;
   char prefix[PREFIX_LEN];
   prefix[0] = '0';
-  prefix[1] = a + ('x' - 'a');
+  prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X';
   const cpp::string_view prefix_str(prefix, PREFIX_LEN);
 
   // If the precision is greater than the actual result, pad with 0s
@@ -200,7 +199,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
   constexpr cpp::string_view HEXADECIMAL_POINT(".");
 
   // This is for the letter 'p' before the exponent.
-  const char exp_separator = a + ('p' - 'a');
+  const char exp_separator = internal::islower(to_conv.conv_name) ? 'p' : 'P';
   constexpr int EXP_SEPARATOR_LEN = 1;
 
   padding = static_cast<int>(to_conv.min_width - (sign_char > 0 ? 1 : 0) -

diff --git a/libc/src/stdio/printf_core/float_inf_nan_converter.h b/libc/src/stdio/printf_core/float_inf_nan_converter.h
@@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_INF_NAN_CONVERTER_H
 
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/ctype_utils.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
@@ -26,8 +27,6 @@ using StorageType = fputil::FPBits<long double>::StorageType;
 LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
   // All of the letters will be defined relative to variable a, which will be
   // the appropriate case based on the case of the conversion.
-  const char a = (to_conv.conv_name & 32) | 'A';
-
   bool is_negative;
   StorageType mantissa;
   if (to_conv.length_modifier == LengthModifier::L) {
@@ -66,9 +65,11 @@ LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
   if (sign_char)
     RET_IF_RESULT_NEGATIVE(writer->write(sign_char));
   if (mantissa == 0) { // inf
-    RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "inf" : "INF"));
+    RET_IF_RESULT_NEGATIVE(
+        writer->write(internal::islower(to_conv.conv_name) ? "inf" : "INF"));
   } else { // nan
-    RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "nan" : "NAN"));
+    RET_IF_RESULT_NEGATIVE(
+        writer->write(internal::islower(to_conv.conv_name) ? "nan" : "NAN"));
   }
 
   if (padding > 0 && ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==