Skip to content

Commit 38d6eae

Browse files
authored
fix: ubsan signed overflow violations (#347)
In these instances, the uint8_t inputs multiplied with the literal, which is a signed long, will make a signed long. For certain inputs, this will overflow, despite the return type being unsigned. Fix by making it always unsigned in the first place.
1 parent 99f3f4a commit 38d6eae

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

src/ada_idna.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2632,7 +2632,9 @@ uint32_t find_range_index(uint32_t key) {
26322632
}
26332633

26342634
bool ascii_has_upper_case(char* input, size_t length) {
2635-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
2635+
auto broadcast = [](uint8_t v) -> uint64_t {
2636+
return 0x101010101010101ull * v;
2637+
};
26362638
uint64_t broadcast_80 = broadcast(0x80);
26372639
uint64_t broadcast_Ap = broadcast(128 - 'A');
26382640
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -2654,7 +2656,9 @@ bool ascii_has_upper_case(char* input, size_t length) {
26542656
}
26552657

26562658
void ascii_map(char* input, size_t length) {
2657-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
2659+
auto broadcast = [](uint8_t v) -> uint64_t {
2660+
return 0x101010101010101ull * v;
2661+
};
26582662
uint64_t broadcast_80 = broadcast(0x80);
26592663
uint64_t broadcast_Ap = broadcast(128 - 'A');
26602664
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);

src/helpers.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,9 @@ ada_really_inline size_t find_next_host_delimiter_special(
210210
auto index_of_first_set_byte = [](uint64_t v) {
211211
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
212212
};
213-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
213+
auto broadcast = [](uint8_t v) -> uint64_t {
214+
return 0x101010101010101ull * v;
215+
};
214216
size_t i = location;
215217
uint64_t mask1 = broadcast(':');
216218
uint64_t mask2 = broadcast('/');
@@ -273,7 +275,9 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
273275
auto index_of_first_set_byte = [](uint64_t v) {
274276
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
275277
};
276-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
278+
auto broadcast = [](uint8_t v) -> uint64_t {
279+
return 0x101010101010101ull * v;
280+
};
277281
size_t i = location;
278282
uint64_t mask1 = broadcast(':');
279283
uint64_t mask2 = broadcast('/');
@@ -599,7 +603,9 @@ find_authority_delimiter_special(std::string_view view) noexcept {
599603
auto index_of_first_set_byte = [](uint64_t v) {
600604
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
601605
};
602-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
606+
auto broadcast = [](uint8_t v) -> uint64_t {
607+
return 0x101010101010101ull * v;
608+
};
603609
size_t i = 0;
604610
uint64_t mask1 = broadcast('@');
605611
uint64_t mask2 = broadcast('/');
@@ -647,7 +653,9 @@ find_authority_delimiter(std::string_view view) noexcept {
647653
auto index_of_first_set_byte = [](uint64_t v) {
648654
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
649655
};
650-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
656+
auto broadcast = [](uint8_t v) -> uint64_t {
657+
return 0x101010101010101ull * v;
658+
};
651659
size_t i = 0;
652660
uint64_t mask1 = broadcast('@');
653661
uint64_t mask2 = broadcast('/');

src/unicode.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ ADA_POP_DISABLE_WARNINGS
1212
namespace ada::unicode {
1313

1414
constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
15-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
15+
auto broadcast = [](uint8_t v) -> uint64_t {
16+
return 0x101010101010101ull * v;
17+
};
1618
uint64_t broadcast_80 = broadcast(0x80);
1719
uint64_t broadcast_Ap = broadcast(128 - 'A');
1820
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -43,7 +45,9 @@ ada_really_inline constexpr bool has_tabs_or_newline(
4345
auto has_zero_byte = [](uint64_t v) {
4446
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
4547
};
46-
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
48+
auto broadcast = [](uint8_t v) -> uint64_t {
49+
return 0x101010101010101ull * v;
50+
};
4751
size_t i = 0;
4852
uint64_t mask1 = broadcast('\r');
4953
uint64_t mask2 = broadcast('\n');

0 commit comments

Comments
 (0)