Skip to content

[EXPERIMENT] Disallow all literal suffixes except the standard numeric ones #103872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1729,9 +1729,9 @@ pub enum LitFloatType {
Unsuffixed,
}

/// Literal kind.
///
/// E.g., `"foo"`, `42`, `12.34`, or `bool`.
/// Note that the entire literal (including the suffix) is considered when
/// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`.
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
pub enum LitKind {
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
Expand All @@ -1745,8 +1745,8 @@ pub enum LitKind {
Char(char),
/// An integer literal (`1`).
Int(u128, LitIntType),
/// A float literal (`1f64` or `1E10f64`). Stored as a symbol rather than
/// `f64` so that `LitKind` can impl `Eq` and `Hash`.
/// A float literal (`1.0`, `1f64` or `1E10f64`). Stored as a symbol rather
/// than `f64` so that `LitKind` can impl `Eq` and `Hash`.
Float(Symbol, LitFloatType),
/// A boolean literal.
Bool(bool),
Expand Down
22 changes: 6 additions & 16 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,16 @@ pub enum Delimiter {
Invisible,
}

/// Note that the entire literal (including the suffix) is considered when
/// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`.
#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
pub enum LitKind {
Bool, // AST only, must never appear in a `Token`
Byte,
Char,
Integer,
Float,
Integer, // e.g. `1`, `1u8`
Float, // e.g. `1.`, `1.0`, `1f32`, `1e3f32`
Str,
StrRaw(u8), // raw string delimited by `n` hash symbols
ByteStr,
Expand All @@ -77,7 +80,7 @@ pub enum LitKind {
pub struct Lit {
pub kind: LitKind,
pub symbol: Symbol,
pub suffix: Option<Symbol>,
pub suffix: Option<Symbol>, // njn: change to a type?
}

impl fmt::Display for Lit {
Expand Down Expand Up @@ -120,19 +123,6 @@ impl LitKind {
}
}

pub fn descr(self) -> &'static str {
match self {
Bool => panic!("literal token contains `Lit::Bool`"),
Byte => "byte",
Char => "char",
Integer => "integer",
Float => "float",
Str | StrRaw(..) => "string",
ByteStr | ByteStrRaw(..) => "byte string",
Err => "error",
}
}

pub(crate) fn may_have_suffix(self) -> bool {
matches!(self, Integer | Float | Err)
}
Expand Down
38 changes: 14 additions & 24 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,21 @@ use rustc_span::Span;

use std::ascii;

// njn: how much of this will be left?
pub enum LitError {
NotLiteral,
LexerError,
InvalidSuffix,
InvalidIntSuffix,
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge,
}

impl LitKind {
/// Converts literal token into a semantic literal.
pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
let token::Lit { kind, symbol, suffix } = lit;
// njn: could even move the suffix into `kind`...
if suffix.is_some() && !kind.may_have_suffix() {
return Err(LitError::InvalidSuffix);
// njn: yuk
return Err(LitError::LexerError);
}

Ok(match kind {
Expand Down Expand Up @@ -259,33 +258,23 @@ fn strip_underscores(symbol: Symbol) -> Symbol {
symbol
}

fn filtered_float_lit(
symbol: Symbol,
suffix: Option<Symbol>,
base: u32,
) -> Result<LitKind, LitError> {
debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base);
if base != 10 {
return Err(LitError::NonDecimalFloat(base));
}
fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("float_lit: {:?}, {:?}", symbol, suffix);
let symbol = strip_underscores(symbol);

Ok(match suffix {
Some(suf) => LitKind::Float(
symbol,
ast::LitFloatType::Suffixed(match suf {
sym::f32 => ast::FloatTy::F32,
sym::f64 => ast::FloatTy::F64,
_ => return Err(LitError::InvalidFloatSuffix),
_ => return Err(LitError::LexerError),
}),
),
None => LitKind::Float(symbol, ast::LitFloatType::Unsuffixed),
})
}

fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("float_lit: {:?}, {:?}", symbol, suffix);
filtered_float_lit(strip_underscores(symbol), suffix, 10)
}

fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> {
debug!("integer_lit: {:?}, {:?}", symbol, suffix);
let symbol = strip_underscores(symbol);
Expand All @@ -312,10 +301,11 @@ fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitErr
sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32),
sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64),
sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128),
// `1f64` and `2f32` etc. are valid float literals, and
// `fxxx` looks more like an invalid float literal than invalid integer literal.
_ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base),
_ => return Err(LitError::InvalidIntSuffix),
_ =>
//return Err(LitError::LexerError), // njn: hmm
{
return Ok(ast::LitKind::Err);
}
},
_ => ast::LitIntType::Unsuffixed,
};
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_ast_lowering/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
let lit = if let ExprKind::Lit(lit) = &expr.kind {
lit.clone()
} else {
// njn: use Lit::from_token_lit here?
Lit {
token_lit: token::Lit::new(token::LitKind::Err, kw::Empty, None),
kind: LitKind::Err,
Expand Down
19 changes: 18 additions & 1 deletion compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,13 @@ pub enum DocStyle {
Inner,
}

// Note that the suffix is *not* considered when deciding the `LiteralKind` in
// this type. This means that float literals like `1f32` are classified by this
// type as `Int`. (Compare against `rustc_ast::token::LitKind` and
// `rustc_ast::ast::LitKind.)
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
/// "12_u8", "0o100", "0b120i99"
/// "12_u8", "0o100", "0b120i99", "1f32".
Int { base: Base, empty_int: bool },
/// "12.34f32", "0b100.100"
Float { base: Base, empty_exponent: bool },
Expand All @@ -187,6 +191,19 @@ pub enum LiteralKind {
RawByteStr { n_hashes: Option<u8> },
}

impl LiteralKind {
pub fn descr(self) -> &'static str {
match self {
Int { .. } => "integer",
Float { .. } => "float",
Char { .. } => "char",
Byte { .. } => "byte",
Str { .. } | RawStr { .. } => "string",
ByteStr { .. } | RawByteStr { .. } => "byte string",
}
}
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum RawStrError {
/// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
Expand Down
Loading