Skip to content

Rollup of 7 pull requests #111283

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f0be145
drive-by cleanup of rustdoc comment
jyn514 Apr 29, 2023
2469afe
Make the BUG_REPORT_URL configurable by tools
jyn514 Apr 29, 2023
7dd59fc
Add Drop terminator to SMIR
spastorino Apr 20, 2023
10b69dd
debuginfo: split method declaration and definition
cuviper May 3, 2023
964fb67
Use fulfillment to check Drop impl compatibility
compiler-errors Apr 20, 2023
9d44f9b
Add test for #110557
compiler-errors Apr 20, 2023
2e346b6
Even more tests
compiler-errors Apr 21, 2023
370d31b
Constify `[u8]::is_ascii` (unstably)
scottmcm May 4, 2023
1cfcf71
Add an example that depends on `is_ascii` in a `const`
scottmcm May 4, 2023
9de3d01
Tune the `is_ascii` implementation used for short slices
scottmcm May 5, 2023
4b85bea
Add Assert terminator to SMIR
spastorino Apr 24, 2023
698acc6
Add GeneratorDrop terminator to SMIR
spastorino Apr 24, 2023
a183ac6
add hint for =< as <=
zacklukem May 5, 2023
2a1ef34
More robust debug assertions for `Instance::resolve` on built-in trai…
compiler-errors May 6, 2023
5c4529b
Rollup merge of #110577 - compiler-errors:drop-impl-fulfill, r=lcnr
matthiaskrgr May 6, 2023
877d938
Rollup merge of #110610 - spastorino:smir-terminator, r=oli-obk
matthiaskrgr May 6, 2023
b3cafff
Rollup merge of #110989 - jyn514:bug-report-url, r=WaffleLapkin
matthiaskrgr May 6, 2023
d3157b6
Rollup merge of #111167 - cuviper:type-decl-disubprogram, r=michaelwo…
matthiaskrgr May 6, 2023
0c72a35
Rollup merge of #111222 - scottmcm:constify-is_ascii, r=thomcc
matthiaskrgr May 6, 2023
a08e689
Rollup merge of #111230 - zacklukem:eq-less-to-less-eq, r=compiler-er…
matthiaskrgr May 6, 2023
1783a35
Rollup merge of #111279 - compiler-errors:core-item-resolve, r=cjgillot
matthiaskrgr May 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion library/core/src/array/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,23 @@ use crate::ascii;
impl<const N: usize> [u8; N] {
/// Converts this array of bytes into a array of ASCII characters,
/// or returns `None` if any of the characters is non-ASCII.
///
/// # Examples
///
/// ```
/// #![feature(ascii_char)]
/// #![feature(const_option)]
///
/// const HEX_DIGITS: [std::ascii::Char; 16] =
/// *b"0123456789abcdef".as_ascii().unwrap();
///
/// assert_eq!(HEX_DIGITS[1].as_str(), "1");
/// assert_eq!(HEX_DIGITS[10].as_str(), "a");
/// ```
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
pub const fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
if self.is_ascii() {
// SAFETY: Just checked that it's ASCII
Some(unsafe { self.as_ascii_unchecked() })
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
#![feature(const_slice_from_raw_parts_mut)]
#![feature(const_slice_from_ref)]
#![feature(const_slice_index)]
#![feature(const_slice_is_ascii)]
#![feature(const_slice_ptr_len)]
#![feature(const_slice_split_at_mut)]
#![feature(const_str_from_utf8_unchecked_mut)]
Expand Down
51 changes: 39 additions & 12 deletions library/core/src/slice/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ use crate::ops;
impl [u8] {
/// Checks if all bytes in this slice are within the ASCII range.
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
#[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
#[must_use]
#[inline]
pub fn is_ascii(&self) -> bool {
pub const fn is_ascii(&self) -> bool {
is_ascii(self)
}

Expand All @@ -21,7 +22,7 @@ impl [u8] {
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
if self.is_ascii() {
// SAFETY: Just checked that it's ASCII
Some(unsafe { self.as_ascii_unchecked() })
Expand Down Expand Up @@ -262,11 +263,29 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
/// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
/// from `../str/mod.rs`, which does something similar for utf8 validation.
#[inline]
fn contains_nonascii(v: usize) -> bool {
const fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
(NONASCII_MASK & v) != 0
}

/// ASCII test *without* the chunk-at-a-time optimizations.
///
/// This is carefully structured to produce nice small code -- it's smaller in
/// `-O` than what the "obvious" ways produces under `-C opt-level=s`. If you
/// touch it, be sure to run (and update if needed) the assembly test.
#[unstable(feature = "str_internals", issue = "none")]
#[doc(hidden)]
#[inline]
pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
while let [rest @ .., last] = bytes {
if !last.is_ascii() {
break;
}
bytes = rest;
}
bytes.is_empty()
}

/// Optimized ASCII test that will use usize-at-a-time operations instead of
/// byte-at-a-time operations (when possible).
///
Expand All @@ -280,7 +299,7 @@ fn contains_nonascii(v: usize) -> bool {
/// If any of these loads produces something for which `contains_nonascii`
/// (above) returns true, then we know the answer is false.
#[inline]
fn is_ascii(s: &[u8]) -> bool {
const fn is_ascii(s: &[u8]) -> bool {
const USIZE_SIZE: usize = mem::size_of::<usize>();

let len = s.len();
Expand All @@ -292,7 +311,7 @@ fn is_ascii(s: &[u8]) -> bool {
// We also do this for architectures where `size_of::<usize>()` isn't
// sufficient alignment for `usize`, because it's a weird edge case.
if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem::align_of::<usize>() {
return s.iter().all(|b| b.is_ascii());
return is_ascii_simple(s);
}

// We always read the first word unaligned, which means `align_offset` is
Expand Down Expand Up @@ -321,18 +340,26 @@ fn is_ascii(s: &[u8]) -> bool {
// Paranoia check about alignment, since we're about to do a bunch of
// unaligned loads. In practice this should be impossible barring a bug in
// `align_offset` though.
debug_assert_eq!(word_ptr.addr() % mem::align_of::<usize>(), 0);
// While this method is allowed to spuriously fail in CTFE, if it doesn't
// have alignment information it should have given a `usize::MAX` for
// `align_offset` earlier, sending things through the scalar path instead of
// this one, so this check should pass if it's reachable.
debug_assert!(word_ptr.is_aligned_to(mem::align_of::<usize>()));

// Read subsequent words until the last aligned word, excluding the last
// aligned word by itself to be done in tail check later, to ensure that
// tail is always one `usize` at most to extra branch `byte_pos == len`.
while byte_pos < len - USIZE_SIZE {
debug_assert!(
// Sanity check that the read is in bounds
(word_ptr.addr() + USIZE_SIZE) <= start.addr().wrapping_add(len) &&
// And that our assumptions about `byte_pos` hold.
(word_ptr.addr() - start.addr()) == byte_pos
);
// Sanity check that the read is in bounds
debug_assert!(byte_pos + USIZE_SIZE <= len);
// And that our assumptions about `byte_pos` hold.
debug_assert!(matches!(
word_ptr.cast::<u8>().guaranteed_eq(start.wrapping_add(byte_pos)),
// These are from the same allocation, so will hopefully always be
// known to match even in CTFE, but if it refuses to compare them
// that's ok since it's just a debug check anyway.
None | Some(true),
));

// SAFETY: We know `word_ptr` is properly aligned (because of
// `align_offset`), and we know that we have enough bytes between `word_ptr` and the end
Expand Down
4 changes: 4 additions & 0 deletions library/core/src/slice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ mod raw;
mod rotate;
mod specialize;

#[unstable(feature = "str_internals", issue = "none")]
#[doc(hidden)]
pub use ascii::is_ascii_simple;

#[stable(feature = "rust1", since = "1.0.0")]
pub use iter::{Chunks, ChunksMut, Windows};
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
5 changes: 3 additions & 2 deletions library/core/src/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2358,9 +2358,10 @@ impl str {
/// assert!(!non_ascii.is_ascii());
/// ```
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
#[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
#[must_use]
#[inline]
pub fn is_ascii(&self) -> bool {
pub const fn is_ascii(&self) -> bool {
// We can treat each byte as character here: all multibyte characters
// start with a byte that is not in the ASCII range, so we will stop
// there already.
Expand All @@ -2372,7 +2373,7 @@ impl str {
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
// Like in `is_ascii`, we can work on the bytes directly.
self.as_bytes().as_ascii()
}
Expand Down
32 changes: 32 additions & 0 deletions tests/assembly/slice-is_ascii.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// assembly-output: emit-asm
// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel
// no-system-llvm
// only-x86_64
// ignore-sgx
// ignore-debug

#![feature(str_internals)]

// CHECK-LABEL: is_ascii_simple_demo:
#[no_mangle]
pub fn is_ascii_simple_demo(bytes: &[u8]) -> bool {
// Linux (System V): pointer is rdi; length is rsi
// Windows: pointer is rcx; length is rdx.

// CHECK-NOT: mov
// CHECK-NOT: test
// CHECK-NOT: cmp

// CHECK: .[[LOOPHEAD:.+]]:
// CHECK-NEXT: mov [[TEMP:.+]], [[LEN:rsi|rdx]]
// CHECK-NEXT: sub [[LEN]], 1
// CHECK-NEXT: jb .[[LOOPEXIT:.+]]
// CHECK-NEXT: cmp byte ptr [{{rdi|rcx}} + [[TEMP]] - 1], 0
// CHECK-NEXT: jns .[[LOOPHEAD]]

// CHECK-NEXT: .[[LOOPEXIT]]:
// CHECK-NEXT: test [[TEMP]], [[TEMP]]
// CHECK-NEXT: sete al
// CHECK-NEXT: ret
core::slice::is_ascii_simple(bytes)
}