Skip to content

wtf8, char: Replace uses of mem::transmute with more specific functions #27233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 25, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,18 @@ pub fn from_u32(i: u32) -> Option<char> {
if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
None
} else {
Some(unsafe { transmute(i) })
Some(unsafe { from_u32_unchecked(i) })
}
}

/// Converts a `u32` to an `char`, not checking whether it is a valid unicode
/// codepoint.
#[inline]
#[unstable(feature = "char_from_unchecked", reason = "recently added API")]
pub unsafe fn from_u32_unchecked(i: u32) -> char {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add an explicit #[unstable] annotation to this function? It should have its own feature name as well.

transmute(i)
}

/// Converts a number to the character representing it.
///
/// # Return value
Expand Down Expand Up @@ -115,12 +123,11 @@ pub fn from_digit(num: u32, radix: u32) -> Option<char> {
panic!("from_digit: radix is too high (maximum 36)");
}
if num < radix {
unsafe {
if num < 10 {
Some(transmute('0' as u32 + num))
} else {
Some(transmute('a' as u32 + num - 10))
}
let num = num as u8;
if num < 10 {
Some((b'0' + num) as char)
} else {
Some((b'a' + num - 10) as char)
}
} else {
None
Expand Down Expand Up @@ -318,16 +325,13 @@ impl Iterator for EscapeUnicode {
Some('{')
}
EscapeUnicodeState::Value(offset) => {
let v = match ((self.c as i32) >> (offset * 4)) & 0xf {
i @ 0 ... 9 => '0' as i32 + i,
i => 'a' as i32 + (i - 10)
};
let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap();
if offset == 0 {
self.state = EscapeUnicodeState::RightBrace;
} else {
self.state = EscapeUnicodeState::Value(offset - 1);
}
Some(unsafe { transmute(v) })
Some(c)
}
EscapeUnicodeState::RightBrace => {
self.state = EscapeUnicodeState::Done;
Expand Down
2 changes: 1 addition & 1 deletion src/librustc_unicode/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use core::iter::Iterator;
use tables::{derived_property, property, general_category, conversions, charwidth};

// stable reexports
pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};
pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicode, EscapeDefault};

// unstable reexports
#[allow(deprecated)]
Expand Down
1 change: 1 addition & 0 deletions src/libstd/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@
#![feature(borrow_state)]
#![feature(box_raw)]
#![feature(box_syntax)]
#![feature(char_from_unchecked)]
#![feature(char_internals)]
#![feature(clone_from_slice)]
#![feature(collections)]
Expand Down
31 changes: 19 additions & 12 deletions src/libstd/sys/common/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,18 @@ use core::str::next_code_point;

use ascii::*;
use borrow::Cow;
use char;
use cmp;
use fmt;
use hash::{Hash, Hasher};
use iter::FromIterator;
use mem;
use ops;
use rustc_unicode::str::{Utf16Item, utf16_items};
use slice;
use str;
use string::String;
use sys_common::AsInner;
use rustc_unicode::str::{Utf16Item, utf16_items};
use vec::Vec;

const UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD";
Expand Down Expand Up @@ -107,7 +108,7 @@ impl CodePoint {
pub fn to_char(&self) -> Option<char> {
match self.value {
0xD800 ... 0xDFFF => None,
_ => Some(unsafe { mem::transmute(self.value) })
_ => Some(unsafe { char::from_u32_unchecked(self.value) })
}
}

Expand Down Expand Up @@ -213,18 +214,16 @@ impl Wtf8Buf {
// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
let slice = slice::from_raw_parts_mut(
self.bytes.as_mut_ptr().offset(cur_len as isize),
4
self.bytes.as_mut_ptr().offset(cur_len as isize), 4
);
let used = encode_utf8_raw(code_point.value, mem::transmute(slice))
.unwrap_or(0);
let used = encode_utf8_raw(code_point.value, slice).unwrap();
self.bytes.set_len(cur_len + used);
}
}

#[inline]
pub fn as_slice(&self) -> &Wtf8 {
unsafe { mem::transmute(&*self.bytes) }
unsafe { Wtf8::from_bytes_unchecked(&self.bytes) }
}

/// Reserves capacity for at least `additional` more bytes to be inserted
Expand Down Expand Up @@ -457,7 +456,16 @@ impl Wtf8 {
/// Since WTF-8 is a superset of UTF-8, this always succeeds.
#[inline]
pub fn from_str(value: &str) -> &Wtf8 {
unsafe { mem::transmute(value.as_bytes()) }
unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) }
}

/// Creates a WTF-8 slice from a WTF-8 byte slice.
///
/// Since the byte slice is not checked for valid WTF-8, this functions is
/// marked unsafe.
#[inline]
unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
mem::transmute(value)
}

/// Returns the length, in WTF-8 bytes.
Expand Down Expand Up @@ -682,7 +690,7 @@ fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
#[inline]
fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
unsafe { mem::transmute(code_point) }
unsafe { char::from_u32_unchecked(code_point) }
}

/// Copied from core::str::StrPrelude::is_char_boundary
Expand All @@ -699,7 +707,7 @@ pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
#[inline]
pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
// memory layout of an &[u8] and &Wtf8 are the same
mem::transmute(slice::from_raw_parts(
Wtf8::from_bytes_unchecked(slice::from_raw_parts(
s.bytes.as_ptr().offset(begin as isize),
end - begin
))
Expand Down Expand Up @@ -821,7 +829,6 @@ mod tests {
use prelude::v1::*;
use borrow::Cow;
use super::*;
use mem::transmute;

#[test]
fn code_point_from_u32() {
Expand Down Expand Up @@ -962,7 +969,7 @@ mod tests {
string.push_wtf8(Wtf8::from_str(" 💩"));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");

fn w(value: &[u8]) -> &Wtf8 { unsafe { transmute(value) } }
fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } }

let mut string = Wtf8Buf::new();
string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
Expand Down