From d188cfd654f3b7159a7bd08234111069056c2769 Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Thu, 9 Apr 2020 11:01:17 +0200 Subject: [PATCH] feat: adds support for literal number suffixes --- crates/mun_codegen/src/ir/body.rs | 57 +- crates/mun_codegen/src/ir/ty.rs | 27 +- .../test__literal_types_file_ir.snap | 55 ++ .../test__literal_types_group_ir.snap | 7 + crates/mun_codegen/src/test.rs | 30 + crates/mun_hir/src/builtin_type.rs | 3 + crates/mun_hir/src/expr.rs | 530 +++++++++++++++++- crates/mun_hir/src/ty/infer.rs | 26 +- .../tests__infer_suffix_literals.snap | 27 + crates/mun_hir/src/ty/tests.rs | 30 + crates/mun_syntax/src/ast/expr_extensions.rs | 156 +++++- .../mun_syntax/src/parsing/lexer/numbers.rs | 12 +- crates/mun_syntax/src/tests/lexer.rs | 4 +- .../src/tests/snapshots/lexer__numbers.snap | 6 +- 14 files changed, 929 insertions(+), 41 deletions(-) create mode 100644 crates/mun_codegen/src/snapshots/test__literal_types_file_ir.snap create mode 100644 crates/mun_codegen/src/snapshots/test__literal_types_group_ir.snap create mode 100644 crates/mun_hir/src/ty/snapshots/tests__infer_suffix_literals.snap diff --git a/crates/mun_codegen/src/ir/body.rs b/crates/mun_codegen/src/ir/body.rs index 0c90bfaa7..8c2dcee8f 100644 --- a/crates/mun_codegen/src/ir/body.rs +++ b/crates/mun_codegen/src/ir/body.rs @@ -12,8 +12,9 @@ use inkwell::{ values::{BasicValueEnum, CallSiteValue, FloatValue, FunctionValue, IntValue, StructValue}, AddressSpace, FloatPredicate, IntPredicate, }; -use std::{collections::HashMap, mem, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; +use crate::ir::ty::ResolveBitness; use inkwell::basic_block::BasicBlock; use inkwell::values::{AggregateValueEnum, GlobalValue, PointerValue}; @@ -200,7 +201,7 @@ impl<'a, 'b, D: IrDatabase> BodyIrGenerator<'a, 'b, D> { let resolver = hir::resolver_for_expr(self.body.clone(), self.db, expr); Some(self.gen_path_expr(p, expr, &resolver)) } - Expr::Literal(lit) => Some(self.gen_literal(lit)), + Expr::Literal(lit) => Some(self.gen_literal(lit, expr)), Expr::RecordLit { fields, .. } => Some(self.gen_record_lit(expr, fields)), Expr::BinaryOp { lhs, rhs, op } => { self.gen_binary_op(expr, *lhs, *rhs, op.expect("missing op")) @@ -246,16 +247,50 @@ impl<'a, 'b, D: IrDatabase> BodyIrGenerator<'a, 'b, D> { } /// Generates an IR value that represents the given `Literal`. - fn gen_literal(&mut self, lit: &Literal) -> BasicValueEnum { + fn gen_literal(&mut self, lit: &Literal, expr: ExprId) -> BasicValueEnum { match lit { - Literal::Int(v) => self - .db - .context() - .i64_type() - .const_int(unsafe { mem::transmute::(*v) }, true) - .into(), - - Literal::Float(v) => self.db.context().f64_type().const_float(*v as f64).into(), + Literal::Int(v) => { + let ty = match &self.infer[expr] { + hir::Ty::Apply(hir::ApplicationTy { + ctor: hir::TypeCtor::Int(int_ty), + .. + }) => int_ty, + _ => unreachable!( + "cannot construct an IR value for anything but an integral type" + ), + }; + + let context = self.db.context(); + let has_sign = ty.signedness == hir::Signedness::Signed; + let ir_ty = match ty.resolve(&self.db.target_data()).bitness { + hir::IntBitness::X8 => context.i8_type().const_int(v.value as u64, has_sign), + hir::IntBitness::X16 => context.i16_type().const_int(v.value as u64, has_sign), + hir::IntBitness::X32 => context.i32_type().const_int(v.value as u64, has_sign), + hir::IntBitness::X64 => context.i64_type().const_int(v.value as u64, has_sign), + _ => unreachable!("unresolved bitness in code generation"), + }; + + ir_ty.into() + } + + Literal::Float(v) => { + let ty = match &self.infer[expr] { + hir::Ty::Apply(hir::ApplicationTy { + ctor: hir::TypeCtor::Float(float_ty), + .. + }) => float_ty, + _ => unreachable!("cannot construct an IR value for anything but a float type"), + }; + + let context = self.db.context(); + let ir_ty = match ty.resolve(&self.db.target_data()).bitness { + hir::FloatBitness::X32 => context.f32_type().const_float(v.value), + hir::FloatBitness::X64 => context.f64_type().const_float(v.value), + _ => unreachable!("unresolved bitness in code generation"), + }; + + ir_ty.into() + } Literal::Bool(value) => { let ty = self.db.context().bool_type(); diff --git a/crates/mun_codegen/src/ir/ty.rs b/crates/mun_codegen/src/ir/ty.rs index cc561de12..3deb8325f 100644 --- a/crates/mun_codegen/src/ir/ty.rs +++ b/crates/mun_codegen/src/ir/ty.rs @@ -5,9 +5,9 @@ use crate::{ CodeGenParams, IrDatabase, }; use hir::{ApplicationTy, CallableDef, FloatBitness, FloatTy, IntBitness, IntTy, Ty, TypeCtor}; +use inkwell::targets::TargetData; use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, FloatType, IntType, StructType}; use inkwell::AddressSpace; -use mun_target::spec::Target; /// Given a mun type, construct an LLVM IR type #[rustfmt::skip] @@ -57,7 +57,7 @@ pub(crate) fn ir_query(db: &impl IrDatabase, ty: Ty, params: CodeGenParams) -> A /// Returns the LLVM IR type of the specified float type fn float_ty_query(db: &impl IrDatabase, fty: FloatTy) -> FloatType { let context = db.context(); - match fty.resolve(&db.target()).bitness { + match fty.resolve(&db.target_data()).bitness { FloatBitness::X64 => context.f64_type(), FloatBitness::X32 => context.f32_type(), _ => unreachable!(), @@ -67,7 +67,7 @@ fn float_ty_query(db: &impl IrDatabase, fty: FloatTy) -> FloatType { /// Returns the LLVM IR type of the specified int type fn int_ty_query(db: &impl IrDatabase, ity: IntTy) -> IntType { let context = db.context(); - match ity.resolve(&db.target()).bitness { + match ity.resolve(&db.target_data()).bitness { IntBitness::X64 => context.i64_type(), IntBitness::X32 => context.i32_type(), IntBitness::X16 => context.i16_type(), @@ -101,7 +101,7 @@ pub fn type_info_query(db: &impl IrDatabase, ty: Ty) -> TypeInfo { let ir_ty = float_ty_query(db, ty); let type_size = TypeSize::from_ir_type(&ir_ty, target.as_ref()); TypeInfo::new( - format!("core::{}", ty.resolve(&db.target())), + format!("core::{}", ty.resolve(&db.target_data())), TypeGroup::FundamentalTypes, type_size, ) @@ -110,7 +110,7 @@ pub fn type_info_query(db: &impl IrDatabase, ty: Ty) -> TypeInfo { let ir_ty = int_ty_query(db, ty); let type_size = TypeSize::from_ir_type(&ir_ty, target.as_ref()); TypeInfo::new( - format!("core::{}", ty.resolve(&db.target())), + format!("core::{}", ty.resolve(&db.target_data())), TypeGroup::FundamentalTypes, type_size, ) @@ -131,12 +131,12 @@ pub fn type_info_query(db: &impl IrDatabase, ty: Ty) -> TypeInfo { } } -trait ResolveBitness { - fn resolve(&self, _target: &Target) -> Self; +pub(crate) trait ResolveBitness { + fn resolve(&self, target: &TargetData) -> Self; } impl ResolveBitness for FloatTy { - fn resolve(&self, _target: &Target) -> Self { + fn resolve(&self, _target: &TargetData) -> Self { let bitness = match self.bitness { FloatBitness::Undefined => FloatBitness::X64, bitness => bitness, @@ -146,10 +146,17 @@ impl ResolveBitness for FloatTy { } impl ResolveBitness for IntTy { - fn resolve(&self, _target: &Target) -> Self { + fn resolve(&self, target: &TargetData) -> Self { + let ptr_bit_size = target.ptr_sized_int_type(None).get_bit_width(); + let bitness = match ptr_bit_size { + 16 => IntBitness::X16, + 32 => IntBitness::X32, + 64 => IntBitness::X64, + _ => unreachable!("unsupported bit size for pointers"), + }; let bitness = match self.bitness { IntBitness::Undefined => IntBitness::X64, - IntBitness::Xsize => IntBitness::X64, + IntBitness::Xsize => bitness, bitness => bitness, }; IntTy { diff --git a/crates/mun_codegen/src/snapshots/test__literal_types_file_ir.snap b/crates/mun_codegen/src/snapshots/test__literal_types_file_ir.snap new file mode 100644 index 000000000..27ee262cd --- /dev/null +++ b/crates/mun_codegen/src/snapshots/test__literal_types_file_ir.snap @@ -0,0 +1,55 @@ +--- +source: crates/mun_codegen/src/test.rs +expression: "fn main(){\n let a = 123;\n let a = 123u8;\n let a = 123u16;\n let a = 123u32;\n let a = 123u64;\n let a = 123uint;\n let a = 1_000_000_u32;\n let a = 123i8;\n let a = 123i16;\n let a = 123i32;\n let a = 123i64;\n let a = 123int;\n let a = 1_000_000_i32;\n let a = 1_000_123.0e-2;\n let a = 1_000_123.0e-2f32;\n let a = 1_000_123.0e-2f64;\n let a = 1_000_123.0e-2float;\n}\n\nfn add(a:u32): u32 {\n a + 12u32\n}" +--- +; ModuleID = 'main.mun' +source_filename = "main.mun" + +define void @main() { +body: + %a16 = alloca double + %a15 = alloca double + %a14 = alloca float + %a13 = alloca double + %a12 = alloca i32 + %a11 = alloca i64 + %a10 = alloca i64 + %a9 = alloca i32 + %a8 = alloca i16 + %a7 = alloca i8 + %a6 = alloca i32 + %a5 = alloca i64 + %a4 = alloca i64 + %a3 = alloca i32 + %a2 = alloca i16 + %a1 = alloca i8 + %a = alloca i64 + store i64 123, i64* %a + store i8 123, i8* %a1 + store i16 123, i16* %a2 + store i32 123, i32* %a3 + store i64 123, i64* %a4 + store i64 123, i64* %a5 + store i32 1000000, i32* %a6 + store i8 123, i8* %a7 + store i16 123, i16* %a8 + store i32 123, i32* %a9 + store i64 123, i64* %a10 + store i64 123, i64* %a11 + store i32 1000000, i32* %a12 + store double 0x40C3889D70A3D70A, double* %a13 + store float 0x40C3889D80000000, float* %a14 + store double 0x40C3889D70A3D70A, double* %a15 + store double 0x40C3889D70A3D70A, double* %a16 + ret void +} + +define i32 @add(i32) { +body: + %a = alloca i32 + store i32 %0, i32* %a + %a1 = load i32, i32* %a + %add = add i32 %a1, 12 + ret i32 %add +} + diff --git a/crates/mun_codegen/src/snapshots/test__literal_types_group_ir.snap b/crates/mun_codegen/src/snapshots/test__literal_types_group_ir.snap new file mode 100644 index 000000000..5fa2fa04f --- /dev/null +++ b/crates/mun_codegen/src/snapshots/test__literal_types_group_ir.snap @@ -0,0 +1,7 @@ +--- +source: crates/mun_codegen/src/test.rs +expression: "fn main(){\n 123;\n 123u8;\n 123u16;\n 123u32;\n 123u64;\n 123uint;\n 1_000_000_u32;\n 123i8;\n 123i16;\n 123i32;\n 123i64;\n 123int;\n 1_000_000_i32;\n 1_000_123.0e-2;\n 1_000_123.0e-2f32;\n 1_000_123.0e-2f64;\n 1_000_123.0e-2float;\n}\n\nfn add(a:u32): u32 {\n a + 12u32\n}" +--- +; ModuleID = 'group_name' +source_filename = "group_name" + diff --git a/crates/mun_codegen/src/test.rs b/crates/mun_codegen/src/test.rs index 08a9e24c1..dbf589997 100644 --- a/crates/mun_codegen/src/test.rs +++ b/crates/mun_codegen/src/test.rs @@ -5,6 +5,36 @@ use mun_target::spec::Target; use std::cell::RefCell; use std::sync::Arc; +#[test] +fn literal_types() { + test_snapshot_unoptimized( + r" + fn main(){ + let a = 123; + let a = 123u8; + let a = 123u16; + let a = 123u32; + let a = 123u64; + let a = 123uint; + let a = 1_000_000_u32; + let a = 123i8; + let a = 123i16; + let a = 123i32; + let a = 123i64; + let a = 123int; + let a = 1_000_000_i32; + let a = 1_000_123.0e-2; + let a = 1_000_123.0e-2f32; + let a = 1_000_123.0e-2f64; + let a = 1_000_123.0e-2float; + } + + fn add(a:u32) -> u32 { + a + 12u32 + }", + ) +} + #[test] fn function() { test_snapshot( diff --git a/crates/mun_hir/src/builtin_type.rs b/crates/mun_hir/src/builtin_type.rs index 0726d15e3..3900421d4 100644 --- a/crates/mun_hir/src/builtin_type.rs +++ b/crates/mun_hir/src/builtin_type.rs @@ -118,12 +118,14 @@ impl BuiltinInt { pub fn from_suffix(suffix: &str) -> Option { let res = match suffix { + "int" => Self::INT, "isize" => Self::ISIZE, "i8" => Self::I8, "i16" => Self::I16, "i32" => Self::I32, "i64" => Self::I64, + "uint" => Self::UINT, "usize" => Self::USIZE, "u8" => Self::U8, "u16" => Self::U16, @@ -146,6 +148,7 @@ impl BuiltinFloat { let res = match suffix { "f32" => BuiltinFloat::F32, "f64" => BuiltinFloat::F64, + "float" => BuiltinFloat::FLOAT, _ => return None, }; Some(res) diff --git a/crates/mun_hir/src/expr.rs b/crates/mun_hir/src/expr.rs index be0dba351..712b5a9f7 100644 --- a/crates/mun_hir/src/expr.rs +++ b/crates/mun_hir/src/expr.rs @@ -12,15 +12,18 @@ use crate::type_ref::{TypeRef, TypeRefBuilder, TypeRefId, TypeRefMap, TypeRefSou use either::Either; pub use mun_syntax::ast::PrefixOp as UnaryOp; use mun_syntax::ast::{ArgListOwner, BinOp, LoopBodyOwner, NameOwner, TypeAscriptionOwner}; -use mun_syntax::{ast, AstNode, AstPtr, T}; +use mun_syntax::{ast, AstNode, AstPtr, SmolStr, T}; use rustc_hash::FxHashMap; use std::ops::Index; use std::sync::Arc; pub use self::scope::ExprScopes; +use crate::builtin_type::{BuiltinFloat, BuiltinInt}; use crate::in_file::InFile; use crate::resolve::Resolver; +use std::borrow::Cow; use std::mem; +use std::str::FromStr; pub(crate) mod scope; pub(crate) mod validator; @@ -33,6 +36,11 @@ impl_arena_id!(ExprId); pub struct PatId(RawId); impl_arena_id!(PatId); +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum ExprDiagnostic { + LiteralError { expr: ExprId, err: LiteralError }, +} + /// The body of an item (function, const etc.). #[derive(Debug, Eq, PartialEq)] pub struct Body { @@ -49,6 +57,9 @@ pub struct Body { /// The `ExprId` of the actual body expression. body_expr: ExprId, ret_type: TypeRefId, + + /// Diagnostics encountered when parsing the ast expressions + diagnostics: Vec, } impl Body { @@ -181,8 +192,50 @@ pub enum Statement { pub enum Literal { String(String), Bool(bool), - Int(i64), - Float(f64), + Int(LiteralInt), + Float(LiteralFloat), +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum LiteralError { + /// We cannot parse the integer because its too large to fit in memory + IntTooLarge, + + /// A lexer error occurred. This might happen if the literal is malformed (e.g. 0b01012) + LexerError, + + /// Encountered an unknown suffix + InvalidIntSuffix(SmolStr), + + /// Encountered an unknown suffix + InvalidFloatSuffix(SmolStr), + + /// Trying to add floating point suffix to a literal that is not a floating point number + NonDecimalFloat(u32), +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct LiteralInt { + pub kind: LiteralIntKind, + pub value: u128, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum LiteralIntKind { + Suffixed(BuiltinInt), + Unsuffixed, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct LiteralFloat { + pub kind: LiteralFloatKind, + pub value: f64, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum LiteralFloatKind { + Suffixed(BuiltinFloat), + Unsuffixed, } impl Eq for Literal {} @@ -375,6 +428,7 @@ pub(crate) struct ExprCollector { ret_type: Option, type_ref_builder: TypeRefBuilder, current_file_id: FileId, + diagnostics: Vec, } impl<'a, DB> ExprCollector<&'a DB> @@ -393,6 +447,7 @@ where ret_type: None, type_ref_builder: TypeRefBuilder::default(), current_file_id: file_id, + diagnostics: Vec::new(), } } @@ -514,19 +569,40 @@ where ast::ExprKind::ReturnExpr(r) => self.collect_return(r), ast::ExprKind::BreakExpr(r) => self.collect_break(r), ast::ExprKind::BlockExpr(b) => self.collect_block(b), - ast::ExprKind::Literal(e) => { - let lit = match e.kind() { - ast::LiteralKind::Bool => Literal::Bool(e.token().kind() == T![true]), - ast::LiteralKind::IntNumber => { - Literal::Int(e.syntax().text().to_string().parse().unwrap()) + ast::ExprKind::Literal(e) => match e.kind() { + ast::LiteralKind::Bool => { + let lit = Literal::Bool(e.token().kind() == T![true]); + self.alloc_expr(Expr::Literal(lit), syntax_ptr) + } + ast::LiteralKind::IntNumber => { + let (text, suffix) = e.text_and_suffix(); + let (lit, errors) = integer_lit(&text, suffix.as_ref().map(SmolStr::as_str)); + let expr_id = self.alloc_expr(Expr::Literal(lit), syntax_ptr); + + for err in errors { + self.diagnostics + .push(ExprDiagnostic::LiteralError { expr: expr_id, err }) } - ast::LiteralKind::FloatNumber => { - Literal::Float(e.syntax().text().to_string().parse().unwrap()) + + expr_id + } + ast::LiteralKind::FloatNumber => { + let (text, suffix) = e.text_and_suffix(); + let (lit, errors) = float_lit(&text, suffix.as_ref().map(SmolStr::as_str)); + let expr_id = self.alloc_expr(Expr::Literal(lit), syntax_ptr); + + for err in errors { + self.diagnostics + .push(ExprDiagnostic::LiteralError { expr: expr_id, err }) } - ast::LiteralKind::String => Literal::String(Default::default()), - }; - self.alloc_expr(Expr::Literal(lit), syntax_ptr) - } + + expr_id + } + ast::LiteralKind::String => { + let lit = Literal::String(Default::default()); + self.alloc_expr(Expr::Literal(lit), syntax_ptr) + } + }, ast::ExprKind::PrefixExpr(e) => { let expr = self.collect_expr_opt(e.expr()); if let Some(op) = e.op_kind() { @@ -781,6 +857,7 @@ where ret_type: self .ret_type .expect("A body should have return type collected"), + diagnostics: self.diagnostics, }; mem::replace(&mut self.source_map.type_refs, type_ref_source_map); (body, self.source_map) @@ -828,3 +905,428 @@ pub(crate) fn resolver_for_scope( } r } + +/// Removes any underscores from a string if present +fn strip_underscores(s: &str) -> Cow { + if s.contains('_') { + let mut s = s.to_string(); + s.retain(|c| c != '_'); + Cow::Owned(s) + } else { + Cow::Borrowed(s) + } +} + +/// Parses the given string into a float literal +fn float_lit(str: &str, suffix: Option<&str>) -> (Literal, Vec) { + let str = strip_underscores(str); + filtered_float_lit(&str, suffix, 10) +} + +/// Parses the given string into a float literal (underscores are already removed from str) +fn filtered_float_lit(str: &str, suffix: Option<&str>, base: u32) -> (Literal, Vec) { + let mut errors = Vec::new(); + if base != 10 { + errors.push(LiteralError::NonDecimalFloat(base)); + } + let kind = match suffix { + Some(suf) => match BuiltinFloat::from_suffix(suf) { + Some(suf) => LiteralFloatKind::Suffixed(suf), + None => { + errors.push(LiteralError::InvalidFloatSuffix(SmolStr::new(suf))); + LiteralFloatKind::Unsuffixed + } + }, + None => LiteralFloatKind::Unsuffixed, + }; + + let value = if base == 10 { + f64::from_str(str).expect("could not parse floating point number, this is definitely a bug") + } else { + 0.0 + }; + (Literal::Float(LiteralFloat { kind, value }), errors) +} + +/// Parses the given string into an integer literal +fn integer_lit(str: &str, suffix: Option<&str>) -> (Literal, Vec) { + let str = strip_underscores(str); + + let base = match str.as_bytes() { + [b'0', b'x', ..] => 16, + [b'0', b'o', ..] => 8, + [b'0', b'b', ..] => 2, + _ => 10, + }; + + let mut errors = Vec::new(); + + let kind = match suffix { + Some(suf) => match BuiltinInt::from_suffix(suf) { + Some(ty) => LiteralIntKind::Suffixed(ty), + None => { + // 1f32 is a valid number, but its an integer disguised as a float + if BuiltinFloat::from_suffix(suf).is_some() { + return filtered_float_lit(&str, suffix, base); + } + + errors.push(LiteralError::InvalidIntSuffix(SmolStr::new(suf))); + LiteralIntKind::Unsuffixed + } + }, + _ => LiteralIntKind::Unsuffixed, + }; + + let str = &str[if base != 10 { 2 } else { 0 }..]; + let (value, err) = match u128::from_str_radix(str, base) { + Ok(i) => (i, None), + Err(_) => { + // Small bases are lexed as if they were base 10, e.g. the string might be + // `0b10201`. This will cause the conversion above to fail. + let from_lexer = base < 10 + && str + .chars() + .any(|c| c.to_digit(10).map_or(false, |d| d >= base)); + if from_lexer { + (0, Some(LiteralError::LexerError)) + } else { + (0, Some(LiteralError::IntTooLarge)) + } + } + }; + + if let Some(err) = err { + errors.push(err); + } + + (Literal::Int(LiteralInt { kind, value }), errors) +} + +#[cfg(test)] +mod test { + use crate::builtin_type::{BuiltinFloat, BuiltinInt}; + use crate::expr::{float_lit, LiteralError, LiteralFloat, LiteralFloatKind}; + use crate::expr::{integer_lit, LiteralInt, LiteralIntKind}; + use crate::Literal; + use mun_syntax::SmolStr; + + #[test] + fn test_integer_literals() { + assert_eq!( + integer_lit("12", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 12 + }), + vec![] + ) + ); + assert_eq!( + integer_lit("0xF00BA", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 0xF00BA + }), + vec![] + ) + ); + assert_eq!( + integer_lit("10_000_000", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 10_000_000 + }), + vec![] + ) + ); + assert_eq!( + integer_lit("0o765431", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 0o765431 + }), + vec![] + ) + ); + assert_eq!( + integer_lit("0b01011100", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 0b01011100 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("0b02011100", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 0 + }), + vec![LiteralError::LexerError] + ) + ); + assert_eq!( + integer_lit("0o09", None), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 0 + }), + vec![LiteralError::LexerError] + ) + ); + + assert_eq!( + integer_lit("1234", Some("foo")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Unsuffixed, + value: 1234 + }), + vec![LiteralError::InvalidIntSuffix(SmolStr::new("foo"))] + ) + ); + + assert_eq!( + integer_lit("123", Some("i8")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::I8), + value: 123 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("i16")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::I16), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("i32")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::I32), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("i64")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::I64), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("isize")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::ISIZE), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("int")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::INT), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("123", Some("u8")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::U8), + value: 123 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("u16")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::U16), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("u32")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::U32), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("u64")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::U64), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("usize")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::USIZE), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1234", Some("uint")), + ( + Literal::Int(LiteralInt { + kind: LiteralIntKind::Suffixed(BuiltinInt::UINT), + value: 1234 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("1", Some("f32")), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Suffixed(BuiltinFloat::F32), + value: 1.0 + }), + vec![] + ) + ); + + assert_eq!( + integer_lit("0x1", Some("f32")), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Suffixed(BuiltinFloat::F32), + value: 0.0 + }), + vec![LiteralError::NonDecimalFloat(16)] + ) + ); + } + + #[test] + fn test_float_literals() { + assert_eq!( + float_lit("1234.1234", None), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Unsuffixed, + value: 1234.1234 + }), + vec![] + ) + ); + + assert_eq!( + float_lit("1_234.1_234", None), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Unsuffixed, + value: 1234.1234 + }), + vec![] + ) + ); + + assert_eq!( + float_lit("1234.1234e2", None), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Unsuffixed, + value: 123412.34 + }), + vec![] + ) + ); + + assert_eq!( + float_lit("1234.1234e2", Some("foo")), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Unsuffixed, + value: 123412.34 + }), + vec![LiteralError::InvalidFloatSuffix(SmolStr::new("foo"))] + ) + ); + + assert_eq!( + float_lit("1234.1234e2", Some("f32")), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Suffixed(BuiltinFloat::F32), + value: 123412.34 + }), + vec![] + ) + ); + + assert_eq!( + float_lit("1234.1234e2", Some("f64")), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Suffixed(BuiltinFloat::F64), + value: 123412.34 + }), + vec![] + ) + ); + + assert_eq!( + float_lit("1234.1234e2", Some("float")), + ( + Literal::Float(LiteralFloat { + kind: LiteralFloatKind::Suffixed(BuiltinFloat::FLOAT), + value: 123412.34 + }), + vec![] + ) + ); + } +} diff --git a/crates/mun_hir/src/ty/infer.rs b/crates/mun_hir/src/ty/infer.rs index 02fae2e0f..4350af947 100644 --- a/crates/mun_hir/src/ty/infer.rs +++ b/crates/mun_hir/src/ty/infer.rs @@ -22,6 +22,7 @@ use std::sync::Arc; mod place_expr; mod type_variable; +use crate::expr::{LiteralFloatKind, LiteralIntKind}; use crate::ty::primitives::{FloatTy, IntTy}; pub use type_variable::TypeVarId; @@ -322,8 +323,29 @@ impl<'a, D: HirDatabase> InferenceResultBuilder<'a, D> { Expr::Literal(lit) => match lit { Literal::String(_) => Ty::Unknown, Literal::Bool(_) => Ty::simple(TypeCtor::Bool), - Literal::Int(_) => Ty::simple(TypeCtor::Int(IntTy::int())), - Literal::Float(_) => Ty::simple(TypeCtor::Float(FloatTy::float())), + Literal::Int(ty) => { + // TODO: Add inferencing support + let ty = if let LiteralIntKind::Suffixed(suffix) = ty.kind { + IntTy { + bitness: suffix.bitness, + signedness: suffix.signedness, + } + } else { + IntTy::int() + }; + Ty::simple(TypeCtor::Int(ty)) + } + Literal::Float(ty) => { + // TODO: Add inferencing support + let ty = if let LiteralFloatKind::Suffixed(suffix) = ty.kind { + FloatTy { + bitness: suffix.bitness, + } + } else { + FloatTy::float() + }; + Ty::simple(TypeCtor::Float(ty)) + } }, Expr::Return { expr } => { if let Some(expr) = expr { diff --git a/crates/mun_hir/src/ty/snapshots/tests__infer_suffix_literals.snap b/crates/mun_hir/src/ty/snapshots/tests__infer_suffix_literals.snap new file mode 100644 index 000000000..c5bdb460b --- /dev/null +++ b/crates/mun_hir/src/ty/snapshots/tests__infer_suffix_literals.snap @@ -0,0 +1,27 @@ +--- +source: crates/mun_hir/src/ty/tests.rs +expression: "fn main(){\n 123;\n 123u8;\n 123u16;\n 123u32;\n 123u64;\n 123uint;\n 1_000_000_u32;\n 123i8;\n 123i16;\n 123i32;\n 123i64;\n 123int;\n 1_000_000_i32;\n 1_000_123.0e-2;\n 1_000_123.0e-2f32;\n 1_000_123.0e-2f64;\n 1_000_123.0e-2float;\n}\n\nfn add(a:u32) -> u32 {\n a + 12u32\n}" +--- +[9; 269) '{ ...oat; }': nothing +[15; 18) '123': int +[24; 29) '123u8': u8 +[35; 41) '123u16': u16 +[47; 53) '123u32': u32 +[59; 65) '123u64': u64 +[71; 78) '123uint': uint +[84; 97) '1_000_000_u32': u32 +[103; 108) '123i8': i8 +[114; 120) '123i16': i16 +[126; 132) '123i32': i32 +[138; 144) '123i64': i64 +[150; 156) '123int': int +[162; 175) '1_000_000_i32': i32 +[181; 195) '1_000_123.0e-2': float +[201; 218) '1_000_...e-2f32': f32 +[224; 241) '1_000_...e-2f64': f64 +[247; 266) '1_000_...2float': float +[278; 279) 'a': u32 +[292; 309) '{ ...2u32 }': u32 +[298; 299) 'a': u32 +[298; 307) 'a + 12u32': u32 +[302; 307) '12u32': u32 diff --git a/crates/mun_hir/src/ty/tests.rs b/crates/mun_hir/src/ty/tests.rs index d4a61cbe4..a100bce38 100644 --- a/crates/mun_hir/src/ty/tests.rs +++ b/crates/mun_hir/src/ty/tests.rs @@ -8,6 +8,36 @@ use mun_syntax::{ast, AstNode}; use std::fmt::Write; use std::sync::Arc; +#[test] +fn infer_suffix_literals() { + infer_snapshot( + r" + fn main(){ + 123; + 123u8; + 123u16; + 123u32; + 123u64; + 123uint; + 1_000_000_u32; + 123i8; + 123i16; + 123i32; + 123i64; + 123int; + 1_000_000_i32; + 1_000_123.0e-2; + 1_000_123.0e-2f32; + 1_000_123.0e-2f64; + 1_000_123.0e-2float; + } + + fn add(a:u32) -> u32 { + a + 12u32 + }", + ) +} + #[test] fn infer_invalid_struct_type() { infer_snapshot( diff --git a/crates/mun_syntax/src/ast/expr_extensions.rs b/crates/mun_syntax/src/ast/expr_extensions.rs index 95e063eca..0f607b0c5 100644 --- a/crates/mun_syntax/src/ast/expr_extensions.rs +++ b/crates/mun_syntax/src/ast/expr_extensions.rs @@ -1,10 +1,12 @@ use super::{children, BinExpr}; use crate::ast::{child_opt, AstChildren, Literal}; use crate::{ - ast, AstNode, + ast, AstNode, SmolStr, SyntaxKind::{self, *}, SyntaxToken, TextRange, TextUnit, }; +use std::iter::Peekable; +use std::str::CharIndices; #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum PrefixOp { @@ -174,6 +176,103 @@ impl Literal { _ => unreachable!(), } } + + pub fn text_and_suffix(&self) -> (SmolStr, Option) { + let token = self.token(); + let text = token.text(); + match self.kind() { + LiteralKind::String => (text.clone(), None), + LiteralKind::IntNumber => { + let (str, suffix) = split_int_text_and_suffix(text); + (SmolStr::new(str), suffix.map(SmolStr::new)) + } + LiteralKind::FloatNumber => { + let (str, suffix) = split_float_text_and_suffix(text); + (SmolStr::new(str), suffix.map(SmolStr::new)) + } + LiteralKind::Bool => (text.clone(), None), + } + } +} + +/// Given a string containing an integer literal (e.g `0x123` or `1234u32`), split the string in the +/// value part and the suffix part. +fn split_int_text_and_suffix(text: &str) -> (&str, Option<&str>) { + let base = match text.as_bytes() { + [b'0', b'x', ..] => 16, + [b'0', b'b', ..] => 8, + [b'0', b'o', ..] => 2, + _ => 10, + }; + + let mut iter = text.char_indices().peekable(); + + // Skip base specifier + if base != 10 { + iter.next(); + iter.next(); + } + + // Skip digits in the string + skip_digits(base, &mut iter); + + if let Some((idx, _)) = iter.next() { + (&text[0..idx], Some(&text[idx..])) + } else { + (text, None) + } +} + +/// Skip all digits in the iterator that belong to the given base +fn skip_digits(base: usize, iter: &mut Peekable) { + while let Some((_, c)) = iter.peek() { + if match c { + '0'..='1' => true, + '2'..='8' if base > 2 => true, + '9' if base > 8 => true, + 'a'..='f' | 'A'..='F' if base > 10 => true, + '_' => true, + _ => false, + } { + iter.next(); + } else { + break; + } + } +} + +/// Given a string containing an float literal (e.g `123.4` or `1234.4f32`), split the string in the +/// value part and the suffix part. +fn split_float_text_and_suffix(text: &str) -> (&str, Option<&str>) { + let mut iter = text.char_indices().peekable(); + skip_digits(10, &mut iter); + + // Continue after a decimal seperator + if let Some((_, '.')) = iter.peek() { + iter.next(); + skip_digits(10, &mut iter); + } + + // Continue after exponent + if let Some((_, c)) = iter.peek() { + if *c == 'e' || *c == 'E' { + iter.next(); + + if let Some((_, c)) = iter.peek() { + if *c == '-' || *c == '+' { + iter.next(); + } + } + + skip_digits(10, &mut iter); + } + } + + if let Some((idx, _)) = iter.next() { + (&text[0..idx], Some(&text[idx..])) + } else { + (text, None) + } } #[derive(Debug, Clone, PartialEq, Eq)] @@ -201,3 +300,58 @@ impl ast::IfExpr { children(self) } } + +#[cfg(test)] +mod tests { + use super::{split_float_text_and_suffix, split_int_text_and_suffix}; + + #[test] + fn split_int_and_suffix() { + assert_eq!(split_int_text_and_suffix("123"), ("123", None)); + assert_eq!(split_int_text_and_suffix("0x123"), ("0x123", None)); + assert_eq!(split_int_text_and_suffix("123_456"), ("123_456", None)); + assert_eq!(split_int_text_and_suffix("0xfff32"), ("0xfff32", None)); + assert_eq!(split_int_text_and_suffix("0xff_f32"), ("0xff_f32", None)); + assert_eq!( + split_int_text_and_suffix("0xff_u32"), + ("0xff_", Some("u32")) + ); + assert_eq!( + split_int_text_and_suffix("0x0101u32"), + ("0x0101", Some("u32")) + ); + assert_eq!( + split_int_text_and_suffix("0xffffu32"), + ("0xffff", Some("u32")) + ); + } + + #[test] + fn split_float_and_suffix() { + assert_eq!(split_float_text_and_suffix("123.0"), ("123.0", None)); + assert_eq!( + split_float_text_and_suffix("123.0f32"), + ("123.0", Some("f32")) + ); + assert_eq!( + split_float_text_and_suffix("123e10f32"), + ("123e10", Some("f32")) + ); + assert_eq!( + split_float_text_and_suffix("123E10f32"), + ("123E10", Some("f32")) + ); + assert_eq!( + split_float_text_and_suffix("123E+10f32"), + ("123E+10", Some("f32")) + ); + assert_eq!( + split_float_text_and_suffix("123E-10f32"), + ("123E-10", Some("f32")) + ); + assert_eq!( + split_float_text_and_suffix("123.123E10f32"), + ("123.123E10", Some("f32")) + ); + } +} diff --git a/crates/mun_syntax/src/parsing/lexer/numbers.rs b/crates/mun_syntax/src/parsing/lexer/numbers.rs index 789d8876b..2a53605de 100644 --- a/crates/mun_syntax/src/parsing/lexer/numbers.rs +++ b/crates/mun_syntax/src/parsing/lexer/numbers.rs @@ -14,7 +14,7 @@ pub(crate) fn scan_number(c: char, cursor: &mut Cursor) -> SyntaxKind { scan_digits(cursor, true); } '0'..='9' | '_' | '.' | 'e' | 'E' => { - scan_digits(cursor, true); + scan_digits(cursor, false); } _ => return INT_NUMBER, } @@ -28,17 +28,27 @@ pub(crate) fn scan_number(c: char, cursor: &mut Cursor) -> SyntaxKind { cursor.bump(); scan_digits(cursor, false); scan_float_exponent(cursor); + scan_suffix(cursor); return FLOAT_NUMBER; } if cursor.matches('e') || cursor.matches('E') { scan_float_exponent(cursor); + scan_suffix(cursor); return FLOAT_NUMBER; } + scan_suffix(cursor); INT_NUMBER } +fn scan_suffix(cursor: &mut Cursor) { + if cursor.matches_nth_if(0, is_ident_start) { + cursor.bump(); + cursor.bump_while(is_ident_continue); + } +} + fn scan_digits(cursor: &mut Cursor, allow_hex: bool) { while let Some(c) = cursor.current() { match c { diff --git a/crates/mun_syntax/src/tests/lexer.rs b/crates/mun_syntax/src/tests/lexer.rs index 35e3cf712..cb3f2ac4c 100644 --- a/crates/mun_syntax/src/tests/lexer.rs +++ b/crates/mun_syntax/src/tests/lexer.rs @@ -30,7 +30,9 @@ fn numbers() { 1.34 0x3Af 1e-3 - 100_000"#, + 100_000 + 0x3a_u32 + 1f32"#, ) } diff --git a/crates/mun_syntax/src/tests/snapshots/lexer__numbers.snap b/crates/mun_syntax/src/tests/snapshots/lexer__numbers.snap index 04dfdedd8..8c9e54808 100644 --- a/crates/mun_syntax/src/tests/snapshots/lexer__numbers.snap +++ b/crates/mun_syntax/src/tests/snapshots/lexer__numbers.snap @@ -1,6 +1,6 @@ --- source: crates/mun_syntax/src/tests/lexer.rs -expression: "1.34\n0x3Af\n1e-3\n100_000" +expression: "1.34\n0x3Af\n1e-3\n100_000\n0x3a_u32\n1f32" --- FLOAT_NUMBER 4 "1.34" WHITESPACE 1 "\n" @@ -9,4 +9,8 @@ WHITESPACE 1 "\n" FLOAT_NUMBER 4 "1e-3" WHITESPACE 1 "\n" INT_NUMBER 7 "100_000" +WHITESPACE 1 "\n" +INT_NUMBER 8 "0x3a_u32" +WHITESPACE 1 "\n" +INT_NUMBER 4 "1f32"