Skip to content

Commit d126d23

Browse files
nbdd0121ojeda
authored andcommitted
rust: str: add CStr type
Add the `CStr` type, which is a borrowed string that is guaranteed to have exactly one `NUL` byte, which is at the end. It is used for interoperability with kernel APIs that take C strings. Add it to the prelude too. Co-developed-by: Alex Gaynor <alex.gaynor@gmail.com> Signed-off-by: Alex Gaynor <alex.gaynor@gmail.com> Co-developed-by: Milan Landaverde <milan@mdaverde.com> Signed-off-by: Milan Landaverde <milan@mdaverde.com> Signed-off-by: Gary Guo <gary@garyguo.net> [Reworded, adapted for upstream and applied latest changes] Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
1 parent 650ec51 commit d126d23

File tree

2 files changed

+170
-1
lines changed

2 files changed

+170
-1
lines changed

rust/kernel/prelude.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notic
2121

2222
pub use super::error::{code::*, Error, Result};
2323

24-
pub use super::ThisModule;
24+
pub use super::{str::CStr, ThisModule};

rust/kernel/str.rs

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
55
use core::fmt;
66

7+
use crate::{
8+
bindings,
9+
error::{code::*, Error},
10+
};
11+
712
/// Byte string without UTF-8 validity guarantee.
813
///
914
/// `BStr` is simply an alias to `[u8]`, but has a more evident semantical meaning.
@@ -30,6 +35,170 @@ macro_rules! b_str {
3035
}};
3136
}
3237

38+
/// Possible errors when using conversion functions in [`CStr`].
39+
#[derive(Debug, Clone, Copy)]
40+
pub enum CStrConvertError {
41+
/// Supplied bytes contain an interior `NUL`.
42+
InteriorNul,
43+
44+
/// Supplied bytes are not terminated by `NUL`.
45+
NotNulTerminated,
46+
}
47+
48+
impl From<CStrConvertError> for Error {
49+
#[inline]
50+
fn from(_: CStrConvertError) -> Error {
51+
EINVAL
52+
}
53+
}
54+
55+
/// A string that is guaranteed to have exactly one `NUL` byte, which is at the
56+
/// end.
57+
///
58+
/// Used for interoperability with kernel APIs that take C strings.
59+
#[repr(transparent)]
60+
pub struct CStr([u8]);
61+
62+
impl CStr {
63+
/// Returns the length of this string excluding `NUL`.
64+
#[inline]
65+
pub const fn len(&self) -> usize {
66+
self.len_with_nul() - 1
67+
}
68+
69+
/// Returns the length of this string with `NUL`.
70+
#[inline]
71+
pub const fn len_with_nul(&self) -> usize {
72+
// SAFETY: This is one of the invariant of `CStr`.
73+
// We add a `unreachable_unchecked` here to hint the optimizer that
74+
// the value returned from this function is non-zero.
75+
if self.0.is_empty() {
76+
unsafe { core::hint::unreachable_unchecked() };
77+
}
78+
self.0.len()
79+
}
80+
81+
/// Returns `true` if the string only includes `NUL`.
82+
#[inline]
83+
pub const fn is_empty(&self) -> bool {
84+
self.len() == 0
85+
}
86+
87+
/// Wraps a raw C string pointer.
88+
///
89+
/// # Safety
90+
///
91+
/// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must
92+
/// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr`
93+
/// must not be mutated.
94+
#[inline]
95+
pub unsafe fn from_char_ptr<'a>(ptr: *const core::ffi::c_char) -> &'a Self {
96+
// SAFETY: The safety precondition guarantees `ptr` is a valid pointer
97+
// to a `NUL`-terminated C string.
98+
let len = unsafe { bindings::strlen(ptr) } + 1;
99+
// SAFETY: Lifetime guaranteed by the safety precondition.
100+
let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len as _) };
101+
// SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`.
102+
// As we have added 1 to `len`, the last byte is known to be `NUL`.
103+
unsafe { Self::from_bytes_with_nul_unchecked(bytes) }
104+
}
105+
106+
/// Creates a [`CStr`] from a `[u8]`.
107+
///
108+
/// The provided slice must be `NUL`-terminated, does not contain any
109+
/// interior `NUL` bytes.
110+
pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> {
111+
if bytes.is_empty() {
112+
return Err(CStrConvertError::NotNulTerminated);
113+
}
114+
if bytes[bytes.len() - 1] != 0 {
115+
return Err(CStrConvertError::NotNulTerminated);
116+
}
117+
let mut i = 0;
118+
// `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking,
119+
// while it couldn't optimize away bounds checks for `i < bytes.len() - 1`.
120+
while i + 1 < bytes.len() {
121+
if bytes[i] == 0 {
122+
return Err(CStrConvertError::InteriorNul);
123+
}
124+
i += 1;
125+
}
126+
// SAFETY: We just checked that all properties hold.
127+
Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) })
128+
}
129+
130+
/// Creates a [`CStr`] from a `[u8]` without performing any additional
131+
/// checks.
132+
///
133+
/// # Safety
134+
///
135+
/// `bytes` *must* end with a `NUL` byte, and should only have a single
136+
/// `NUL` byte (or the string will be truncated).
137+
#[inline]
138+
pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr {
139+
// SAFETY: Properties of `bytes` guaranteed by the safety precondition.
140+
unsafe { core::mem::transmute(bytes) }
141+
}
142+
143+
/// Returns a C pointer to the string.
144+
#[inline]
145+
pub const fn as_char_ptr(&self) -> *const core::ffi::c_char {
146+
self.0.as_ptr() as _
147+
}
148+
149+
/// Convert the string to a byte slice without the trailing 0 byte.
150+
#[inline]
151+
pub fn as_bytes(&self) -> &[u8] {
152+
&self.0[..self.len()]
153+
}
154+
155+
/// Convert the string to a byte slice containing the trailing 0 byte.
156+
#[inline]
157+
pub const fn as_bytes_with_nul(&self) -> &[u8] {
158+
&self.0
159+
}
160+
161+
/// Yields a [`&str`] slice if the [`CStr`] contains valid UTF-8.
162+
///
163+
/// If the contents of the [`CStr`] are valid UTF-8 data, this
164+
/// function will return the corresponding [`&str`] slice. Otherwise,
165+
/// it will return an error with details of where UTF-8 validation failed.
166+
///
167+
/// # Examples
168+
///
169+
/// ```
170+
/// # use kernel::str::CStr;
171+
/// let cstr = CStr::from_bytes_with_nul(b"foo\0").unwrap();
172+
/// assert_eq!(cstr.to_str(), Ok("foo"));
173+
/// ```
174+
#[inline]
175+
pub fn to_str(&self) -> Result<&str, core::str::Utf8Error> {
176+
core::str::from_utf8(self.as_bytes())
177+
}
178+
179+
/// Unsafely convert this [`CStr`] into a [`&str`], without checking for
180+
/// valid UTF-8.
181+
///
182+
/// # Safety
183+
///
184+
/// The contents must be valid UTF-8.
185+
///
186+
/// # Examples
187+
///
188+
/// ```
189+
/// # use kernel::c_str;
190+
/// # use kernel::str::CStr;
191+
/// // SAFETY: String literals are guaranteed to be valid UTF-8
192+
/// // by the Rust compiler.
193+
/// let bar = c_str!("ツ");
194+
/// assert_eq!(unsafe { bar.as_str_unchecked() }, "ツ");
195+
/// ```
196+
#[inline]
197+
pub unsafe fn as_str_unchecked(&self) -> &str {
198+
unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
199+
}
200+
}
201+
33202
/// Allows formatting of [`fmt::Arguments`] into a raw buffer.
34203
///
35204
/// It does not fail if callers write past the end of the buffer so that they can calculate the

0 commit comments

Comments
 (0)