Skip to content

Commit 29e08c0

Browse files
authored
Merge pull request #525 from dtolnay/literalvalue
`Literal::{str_value, cstr_value, byte_str_value}`
2 parents 39b016a + f9eec24 commit 29e08c0

File tree

4 files changed

+959
-0
lines changed

4 files changed

+959
-0
lines changed

src/lib.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,16 @@ mod imp;
164164
#[cfg(span_locations)]
165165
mod location;
166166

167+
#[cfg(procmacro2_semver_exempt)]
168+
mod num;
169+
#[cfg(procmacro2_semver_exempt)]
170+
#[allow(dead_code)]
171+
mod rustc_literal_escaper;
172+
167173
use crate::extra::DelimSpan;
168174
use crate::marker::{ProcMacroAutoTraits, MARKER};
175+
#[cfg(procmacro2_semver_exempt)]
176+
use crate::rustc_literal_escaper::MixedUnit;
169177
use core::cmp::Ordering;
170178
use core::fmt::{self, Debug, Display};
171179
use core::hash::{Hash, Hasher};
@@ -182,6 +190,10 @@ use std::path::PathBuf;
182190
#[cfg_attr(docsrs, doc(cfg(feature = "span-locations")))]
183191
pub use crate::location::LineColumn;
184192

193+
#[cfg(procmacro2_semver_exempt)]
194+
#[cfg_attr(docsrs, doc(cfg(procmacro2_semver_exempt)))]
195+
pub use crate::rustc_literal_escaper::EscapeError;
196+
185197
/// An abstract stream of tokens, or more concretely a sequence of token trees.
186198
///
187199
/// This type provides interfaces for iterating over token trees and for
@@ -1263,6 +1275,112 @@ impl Literal {
12631275
self.inner.subspan(range).map(Span::_new)
12641276
}
12651277

1278+
/// Returns the unescaped string value if this is a string literal.
1279+
#[cfg(procmacro2_semver_exempt)]
1280+
pub fn str_value(&self) -> Result<String, ConversionErrorKind> {
1281+
let repr = self.to_string();
1282+
1283+
if repr.starts_with('"') && repr[1..].ends_with('"') {
1284+
let quoted = &repr[1..repr.len() - 1];
1285+
let mut value = String::with_capacity(quoted.len());
1286+
let mut error = None;
1287+
rustc_literal_escaper::unescape_str(quoted, |_range, res| match res {
1288+
Ok(ch) => value.push(ch),
1289+
Err(err) => {
1290+
if err.is_fatal() {
1291+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1292+
}
1293+
}
1294+
});
1295+
return match error {
1296+
Some(error) => Err(error),
1297+
None => Ok(value),
1298+
};
1299+
}
1300+
1301+
if repr.starts_with('r') {
1302+
if let Some(raw) = get_raw(&repr[1..]) {
1303+
return Ok(raw.to_owned());
1304+
}
1305+
}
1306+
1307+
Err(ConversionErrorKind::InvalidLiteralKind)
1308+
}
1309+
1310+
/// Returns the unescaped string value (including nul terminator) if this is
1311+
/// a c-string literal.
1312+
#[cfg(procmacro2_semver_exempt)]
1313+
pub fn cstr_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
1314+
let repr = self.to_string();
1315+
1316+
if repr.starts_with("c\"") && repr[2..].ends_with('"') {
1317+
let quoted = &repr[2..repr.len() - 1];
1318+
let mut value = Vec::with_capacity(quoted.len());
1319+
let mut error = None;
1320+
rustc_literal_escaper::unescape_c_str(quoted, |_range, res| match res {
1321+
Ok(MixedUnit::Char(ch)) => {
1322+
value.extend_from_slice(ch.get().encode_utf8(&mut [0; 4]).as_bytes());
1323+
}
1324+
Ok(MixedUnit::HighByte(byte)) => value.push(byte.get()),
1325+
Err(err) => {
1326+
if err.is_fatal() {
1327+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1328+
}
1329+
}
1330+
});
1331+
return match error {
1332+
Some(error) => Err(error),
1333+
None => {
1334+
value.push(b'\0');
1335+
Ok(value)
1336+
}
1337+
};
1338+
}
1339+
1340+
if repr.starts_with("cr") {
1341+
if let Some(raw) = get_raw(&repr[2..]) {
1342+
let mut value = Vec::with_capacity(raw.len() + 1);
1343+
value.extend_from_slice(raw.as_bytes());
1344+
value.push(b'\0');
1345+
return Ok(value);
1346+
}
1347+
}
1348+
1349+
Err(ConversionErrorKind::InvalidLiteralKind)
1350+
}
1351+
1352+
/// Returns the unescaped string value if this is a byte string literal.
1353+
#[cfg(procmacro2_semver_exempt)]
1354+
pub fn byte_str_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
1355+
let repr = self.to_string();
1356+
1357+
if repr.starts_with("b\"") && repr[2..].ends_with('"') {
1358+
let quoted = &repr[2..repr.len() - 1];
1359+
let mut value = Vec::with_capacity(quoted.len());
1360+
let mut error = None;
1361+
rustc_literal_escaper::unescape_byte_str(quoted, |_range, res| match res {
1362+
Ok(byte) => value.push(byte),
1363+
Err(err) => {
1364+
if err.is_fatal() {
1365+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1366+
}
1367+
}
1368+
});
1369+
return match error {
1370+
Some(error) => Err(error),
1371+
None => Ok(value),
1372+
};
1373+
}
1374+
1375+
if repr.starts_with("br") {
1376+
if let Some(raw) = get_raw(&repr[2..]) {
1377+
return Ok(raw.as_bytes().to_owned());
1378+
}
1379+
}
1380+
1381+
Err(ConversionErrorKind::InvalidLiteralKind)
1382+
}
1383+
12661384
// Intended for the `quote!` macro to use when constructing a proc-macro2
12671385
// token out of a macro_rules $:literal token, which is already known to be
12681386
// a valid literal. This avoids reparsing/validating the literal's string
@@ -1299,6 +1417,33 @@ impl Display for Literal {
12991417
}
13001418
}
13011419

1420+
/// Error when retrieving a string literal's unescaped value.
1421+
#[cfg(procmacro2_semver_exempt)]
1422+
#[derive(Debug, PartialEq, Eq)]
1423+
pub enum ConversionErrorKind {
1424+
/// The literal is of the right string kind, but its contents are malformed
1425+
/// in a way that cannot be unescaped to a value.
1426+
FailedToUnescape(EscapeError),
1427+
/// The literal is not of the string kind whose value was requested, for
1428+
/// example byte string vs UTF-8 string.
1429+
InvalidLiteralKind,
1430+
}
1431+
1432+
// ###"..."### -> ...
1433+
#[cfg(procmacro2_semver_exempt)]
1434+
fn get_raw(repr: &str) -> Option<&str> {
1435+
let pounds = repr.len() - repr.trim_start_matches('#').len();
1436+
if repr.len() >= pounds + 1 + 1 + pounds
1437+
&& repr[pounds..].starts_with('"')
1438+
&& repr.trim_end_matches('#').len() + pounds == repr.len()
1439+
&& repr[..repr.len() - pounds].ends_with('"')
1440+
{
1441+
Some(&repr[pounds + 1..repr.len() - pounds - 1])
1442+
} else {
1443+
None
1444+
}
1445+
}
1446+
13021447
/// Public implementation details for the `TokenStream` type, such as iterators.
13031448
pub mod token_stream {
13041449
use crate::marker::{ProcMacroAutoTraits, MARKER};

src/num.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// TODO: use NonZero<char> in Rust 1.89+
2+
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
3+
pub struct NonZeroChar(char);
4+
5+
impl NonZeroChar {
6+
pub fn new(ch: char) -> Option<Self> {
7+
if ch == '\0' {
8+
None
9+
} else {
10+
Some(NonZeroChar(ch))
11+
}
12+
}
13+
14+
pub fn get(self) -> char {
15+
self.0
16+
}
17+
}

0 commit comments

Comments
 (0)