Skip to content

[libc] Change ctype to be encoding independent #110574

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
569 changes: 546 additions & 23 deletions libc/src/__support/ctype_utils.h

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions libc/src/__support/high_precision_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,11 @@ class HighPrecisionDecimal {
if (digit_index >= this->num_digits) {
return new_digits - 1;
}
if (this->digits[digit_index] != power_of_five[digit_index] - '0') {
if (this->digits[digit_index] !=
internal::b36_char_to_int(power_of_five[digit_index])) {
return new_digits -
((this->digits[digit_index] < power_of_five[digit_index] - '0')
((this->digits[digit_index] <
internal::b36_char_to_int(power_of_five[digit_index]))
? 1
: 0);
}
Expand Down Expand Up @@ -337,8 +339,8 @@ class HighPrecisionDecimal {
}
++total_digits;
if (this->num_digits < MAX_NUM_DIGITS) {
this->digits[this->num_digits] =
static_cast<uint8_t>(num_string[num_cur] - '0');
this->digits[this->num_digits] = static_cast<uint8_t>(
internal::b36_char_to_int(num_string[num_cur]));
++this->num_digits;
} else if (num_string[num_cur] != '0') {
this->truncated = true;
Expand Down
28 changes: 8 additions & 20 deletions libc/src/__support/integer_literals.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
#define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H

#include "src/__support/CPP/limits.h" // CHAR_BIT
#include "src/__support/CPP/limits.h" // CHAR_BIT
#include "src/__support/ctype_utils.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/config.h"
#include "src/__support/uint128.h" // UInt128
#include <stddef.h> // size_t
#include <stdint.h> // uintxx_t
#include "src/__support/uint128.h" // UInt128
#include <stddef.h> // size_t
#include <stdint.h> // uintxx_t

namespace LIBC_NAMESPACE_DECL {

Expand Down Expand Up @@ -75,26 +76,13 @@ template <typename T, int base> struct DigitBuffer {
push(*str);
}

// Returns the digit for a particular character.
// Returns INVALID_DIGIT if the character is invalid.
LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) {
const auto to_lower = [](char c) { return c | 32; };
const auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
const auto is_alpha = [](char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
};
if (is_digit(c))
return static_cast<uint8_t>(c - '0');
if (base > 10 && is_alpha(c))
return static_cast<uint8_t>(to_lower(c) - 'a' + 10);
return INVALID_DIGIT;
}

// Adds a single character to this buffer.
LIBC_INLINE constexpr void push(char c) {
if (c == '\'')
return; // ' is valid but not taken into account.
const uint8_t value = get_digit_value(c);
const int b36_val = internal::b36_char_to_int(c);
const uint8_t value = static_cast<uint8_t>(
b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT);
if (value == INVALID_DIGIT || size >= MAX_DIGITS) {
// During constant evaluation `__builtin_unreachable` will halt the
// compiler as it is not executable. This is preferable over `assert` that
Expand Down
7 changes: 4 additions & 3 deletions libc/src/__support/integer_to_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#include "src/__support/CPP/type_traits.h"
#include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
#include "src/__support/common.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {
Expand Down Expand Up @@ -214,9 +215,9 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;

LIBC_INLINE static char digit_char(uint8_t digit) {
if (digit < 10)
return '0' + static_cast<char>(digit);
return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
const int result = internal::int_to_b36_char(digit);
return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
: result);
}

LIBC_INLINE static void
Expand Down
2 changes: 1 addition & 1 deletion libc/src/__support/str_to_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
cpp::numeric_limits<StorageType>::max() / BASE;
while (true) {
if (isdigit(src[index])) {
uint32_t digit = src[index] - '0';
uint32_t digit = b36_char_to_int(src[index]);
seen_digit = true;

if (mantissa < bitstype_max_div_by_base) {
Expand Down
10 changes: 1 addition & 9 deletions libc/src/__support/str_to_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,14 @@ first_non_whitespace(const char *__restrict src,
return src + src_cur;
}

LIBC_INLINE int b36_char_to_int(char input) {
if (isdigit(input))
return input - '0';
if (isalpha(input))
return (input | 32) + 10 - 'a';
return 0;
}

// checks if the next 3 characters of the string pointer are the start of a
// hexadecimal number. Does not advance the string pointer.
LIBC_INLINE bool
is_hex_start(const char *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
if (src_len < 3)
return false;
return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
b36_char_to_int(*(src + 2)) < 16;
}

Expand Down
3 changes: 2 additions & 1 deletion libc/src/ctype/isxdigit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) {
const unsigned ch = static_cast<unsigned>(c);
return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
return static_cast<int>(internal::isalnum(ch) &&
internal::b36_char_to_int(ch) < 16);
}

} // namespace LIBC_NAMESPACE_DECL
3 changes: 2 additions & 1 deletion libc/src/ctype/isxdigit_l.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) {
const unsigned ch = static_cast<unsigned>(c);
return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
return static_cast<int>(internal::isalnum(ch) &&
internal::b36_char_to_int(ch) < 16);
}

} // namespace LIBC_NAMESPACE_DECL
6 changes: 1 addition & 5 deletions libc/src/ctype/toupper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, toupper, (int c)) {
if (internal::islower(c))
return c - ('a' - 'A');
return c;
}
LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); }

} // namespace LIBC_NAMESPACE_DECL
4 changes: 1 addition & 3 deletions libc/src/ctype/toupper_l.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) {
if (internal::islower(c))
return c - ('a' - 'A');
return c;
return internal::toupper(c);
}

} // namespace LIBC_NAMESPACE_DECL
14 changes: 6 additions & 8 deletions libc/src/stdio/printf_core/fixed_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "include/llvm-libc-macros/stdfix-macros.h"
#include "src/__support/CPP/string_view.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/fixed_point/fx_bits.h"
#include "src/__support/fixed_point/fx_rep.h"
#include "src/__support/integer_to_string.h"
Expand Down Expand Up @@ -68,10 +69,6 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
using LARep = fixed_point::FXRep<unsigned long accum>;
using StorageType = LARep::StorageType;

// All of the letters will be defined relative to variable a, which will be
// the appropriate case based on the name of the conversion. This converts any
// conversion name into the letter 'a' with the appropriate case.
const char a = (to_conv.conv_name & 32) | 'A';
FormatFlags flags = to_conv.flags;

bool is_negative;
Expand Down Expand Up @@ -179,9 +176,9 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
// unspecified.
RoundDirection round;
char first_digit_after = fraction_digits[precision];
if (first_digit_after > '5') {
if (internal::b36_char_to_int(first_digit_after) > 5) {
round = RoundDirection::Up;
} else if (first_digit_after < '5') {
} else if (internal::b36_char_to_int(first_digit_after) < 5) {
round = RoundDirection::Down;
} else {
// first_digit_after == '5'
Expand All @@ -204,7 +201,8 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
keep_rounding = false;
char cur_digit = fraction_digits[digit_to_round];
// if the digit should not be rounded up
if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) {
if (round == RoundDirection::Even &&
(internal::b36_char_to_int(cur_digit) % 2) == 0) {
// break out of the loop
break;
}
Expand Down Expand Up @@ -246,7 +244,7 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
char sign_char = 0;

// Check if the conv name is uppercase
if (a == 'A') {
if (internal::isupper(to_conv.conv_name)) {
// These flags are only for signed conversions, so this removes them if the
// conversion is unsigned.
flags = FormatFlags(flags &
Expand Down
6 changes: 3 additions & 3 deletions libc/src/stdio/printf_core/float_dec_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/big_int.h" // is_big_int_v
#include "src/__support/ctype_utils.h"
#include "src/__support/float_to_string.h"
#include "src/__support/integer_to_string.h"
#include "src/__support/libc_assert.h"
Expand Down Expand Up @@ -587,8 +588,6 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
int exponent = float_bits.get_explicit_exponent();
StorageType mantissa = float_bits.get_explicit_mantissa();

const char a = (to_conv.conv_name & 32) | 'A';

char sign_char = 0;

if (float_bits.is_neg())
Expand Down Expand Up @@ -734,7 +733,8 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer,
round = get_round_direction(last_digit, truncated, float_bits.sign());

RET_IF_RESULT_NEGATIVE(float_writer.write_last_block(
digits, maximum, round, final_exponent, a + 'E' - 'A'));
digits, maximum, round, final_exponent,
internal::islower(to_conv.conv_name) ? 'e' : 'E'));

RET_IF_RESULT_NEGATIVE(float_writer.right_pad());
return WRITE_OK;
Expand Down
19 changes: 9 additions & 10 deletions libc/src/stdio/printf_core/float_hex_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "src/__support/CPP/string_view.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
Expand All @@ -28,10 +29,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
const FormatSection &to_conv) {
using LDBits = fputil::FPBits<long double>;
using StorageType = LDBits::StorageType;
// All of the letters will be defined relative to variable a, which will be
// the appropriate case based on the name of the conversion. This converts any
// conversion name into the letter 'a' with the appropriate case.
const char a = (to_conv.conv_name & 32) | 'A';

bool is_negative;
int exponent;
Expand Down Expand Up @@ -138,9 +135,10 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
size_t mant_cur = mant_len;
size_t first_non_zero = 1;
for (; mant_cur > 0; --mant_cur, mantissa >>= 4) {
char mant_mod_16 = static_cast<char>(mantissa) & 15;
char new_digit = static_cast<char>(
(mant_mod_16 > 9) ? (mant_mod_16 - 10 + a) : (mant_mod_16 + '0'));
char mant_mod_16 = static_cast<char>(mantissa % 16);
char new_digit = static_cast<char>(internal::int_to_b36_char(mant_mod_16));
if (internal::isupper(to_conv.conv_name))
new_digit = static_cast<char>(internal::toupper(new_digit));
mant_buffer[mant_cur - 1] = new_digit;
if (new_digit != '0' && first_non_zero < mant_cur)
first_non_zero = mant_cur;
Expand Down Expand Up @@ -168,7 +166,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,

size_t exp_cur = EXP_LEN;
for (; exponent > 0; --exp_cur, exponent /= 10) {
exp_buffer[exp_cur - 1] = static_cast<char>((exponent % 10) + '0');
exp_buffer[exp_cur - 1] =
static_cast<char>(internal::int_to_b36_char(exponent % 10));
}
if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0.
exp_buffer[EXP_LEN - 1] = '0';
Expand All @@ -187,7 +186,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
constexpr size_t PREFIX_LEN = 2;
char prefix[PREFIX_LEN];
prefix[0] = '0';
prefix[1] = a + ('x' - 'a');
prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X';
const cpp::string_view prefix_str(prefix, PREFIX_LEN);

// If the precision is greater than the actual result, pad with 0s
Expand All @@ -200,7 +199,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
constexpr cpp::string_view HEXADECIMAL_POINT(".");

// This is for the letter 'p' before the exponent.
const char exp_separator = a + ('p' - 'a');
const char exp_separator = internal::islower(to_conv.conv_name) ? 'p' : 'P';
constexpr int EXP_SEPARATOR_LEN = 1;

padding = static_cast<int>(to_conv.min_width - (sign_char > 0 ? 1 : 0) -
Expand Down
9 changes: 5 additions & 4 deletions libc/src/stdio/printf_core/float_inf_nan_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_INF_NAN_CONVERTER_H

#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
Expand All @@ -26,8 +27,6 @@ using StorageType = fputil::FPBits<long double>::StorageType;
LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
// All of the letters will be defined relative to variable a, which will be
// the appropriate case based on the case of the conversion.
const char a = (to_conv.conv_name & 32) | 'A';

bool is_negative;
StorageType mantissa;
if (to_conv.length_modifier == LengthModifier::L) {
Expand Down Expand Up @@ -66,9 +65,11 @@ LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) {
if (sign_char)
RET_IF_RESULT_NEGATIVE(writer->write(sign_char));
if (mantissa == 0) { // inf
RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "inf" : "INF"));
RET_IF_RESULT_NEGATIVE(
writer->write(internal::islower(to_conv.conv_name) ? "inf" : "INF"));
} else { // nan
RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "nan" : "NAN"));
RET_IF_RESULT_NEGATIVE(
writer->write(internal::islower(to_conv.conv_name) ? "nan" : "NAN"));
}

if (padding > 0 && ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
Expand Down
Loading
Loading