Parse number literal width suffixes

Supports [u,i][8,16,32,64,128] and [nat,dec] Part of #2350
2025-09-29 06:44:46 +00:00 · 2022-01-31 00:30:15 -05:00 · 2022-01-31 00:30:15 -05:00 · 320827167f
commit 320827167f
parent 545882f210
112 changed files with 1159 additions and 127 deletions
--- a/compiler/parse/src/ast.rs
+++ b/compiler/parse/src/ast.rs
@ -1,4 +1,4 @@
-use std::fmt::Debug;
+use std::fmt::{Debug, Display};

 use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PlatformHeader};
 use crate::ident::Ident;
@ -126,6 +126,70 @@ pub enum StrLiteral<'a> {
    Block(&'a [&'a [StrSegment<'a>]]),
 }

+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum NumWidth {
+    U8,
+    U16,
+    U32,
+    U64,
+    U128,
+    I8,
+    I16,
+    I32,
+    I64,
+    I128,
+    Nat,
+    Dec,
+}
+
+impl Display for NumWidth {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use NumWidth::*;
+        f.write_str(match self {
+            U8 => "u8",
+            U16 => "u16",
+            U32 => "u32",
+            U64 => "u64",
+            U128 => "u128",
+            I8 => "i8",
+            I16 => "i16",
+            I32 => "i32",
+            I64 => "i64",
+            I128 => "i128",
+            Nat => "nat",
+            Dec => "dec",
+        })
+    }
+}
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum FloatWidth {
+    F32,
+    F64,
+}
+
+impl Display for FloatWidth {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use FloatWidth::*;
+        f.write_str(match self {
+            F32 => "f32",
+            F64 => "f64",
+        })
+    }
+}
+
+/// Describes a bound on the width of a numeric literal.
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub enum NumericBound<W>
+where
+    W: Copy,
+{
+    /// There is no bound on the width.
+    None,
+    /// Must have exactly the width `W`.
+    Exact(W),
+}
+
 /// A parsed expression. This uses lifetimes extensively for two reasons:
 ///
 /// 1. It uses Bump::alloc for all allocations, which returns a reference.
@ -138,12 +202,13 @@ pub enum StrLiteral<'a> {
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum Expr<'a> {
    // Number Literals
-    Float(&'a str),
-    Num(&'a str),
+    Float(&'a str, NumericBound<FloatWidth>),
+    Num(&'a str, NumericBound<NumWidth>),
    NonBase10Int {
        string: &'a str,
        base: Base,
        is_negative: bool,
+        bound: NumericBound<NumWidth>,
    },

    // String Literals
@ -431,13 +496,14 @@ pub enum Pattern<'a> {
    OptionalField(&'a str, &'a Loc<Expr<'a>>),

    // Literal
-    NumLiteral(&'a str),
+    NumLiteral(&'a str, NumericBound<NumWidth>),
    NonBase10Literal {
        string: &'a str,
        base: Base,
        is_negative: bool,
+        bound: NumericBound<NumWidth>,
    },
-    FloatLiteral(&'a str),
+    FloatLiteral(&'a str, NumericBound<FloatWidth>),
    StrLiteral(StrLiteral<'a>),
    Underscore(&'a str),

@ -540,20 +606,27 @@ impl<'a> Pattern<'a> {
                x == y
            }
            // Literal
-            (NumLiteral(x), NumLiteral(y)) => x == y,
+            (NumLiteral(x, bound_x), NumLiteral(y, bound_y)) => x == y && bound_x == bound_y,
            (
                NonBase10Literal {
                    string: string_x,
                    base: base_x,
                    is_negative: is_negative_x,
+                    bound: bound_x,
                },
                NonBase10Literal {
                    string: string_y,
                    base: base_y,
                    is_negative: is_negative_y,
+                    bound: bound_y,
                },
-            ) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
-            (FloatLiteral(x), FloatLiteral(y)) => x == y,
+            ) => {
+                string_x == string_y
+                    && base_x == base_y
+                    && is_negative_x == is_negative_y
+                    && bound_x == bound_y
+            }
+            (FloatLiteral(x, bound_x), FloatLiteral(y, bound_y)) => x == y && bound_x == bound_y,
            (StrLiteral(x), StrLiteral(y)) => x == y,
            (Underscore(x), Underscore(y)) => x == y,

--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@ -1,6 +1,6 @@
 use crate::ast::{
-    AliasHeader, AssignedField, Collection, CommentOrNewline, Def, Expr, ExtractSpaces, Pattern,
-    Spaceable, TypeAnnotation,
+    AliasHeader, AssignedField, Collection, CommentOrNewline, Def, Expr, ExtractSpaces,
+    NumericBound, Pattern, Spaceable, TypeAnnotation,
 };
 use crate::blankspace::{space0_after_e, space0_around_ee, space0_before_e, space0_e};
 use crate::ident::{lowercase_ident, parse_ident, Ident};
@ -377,7 +377,7 @@ impl<'a> ExprState<'a> {
            } else {
                let region = self.expr.region;

-                let mut value = Expr::Num("");
+                let mut value = Expr::Num("", NumericBound::None);
                std::mem::swap(&mut self.expr.value, &mut value);

                self.expr = arena
@ -515,28 +515,30 @@ fn numeric_negate_expression<'a, T>(
    let region = Region::new(start, expr.region.end());

    let new_expr = match &expr.value {
-        Expr::Num(string) => {
+        &Expr::Num(string, bound) => {
            let new_string =
                unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };

-            Expr::Num(new_string)
+            Expr::Num(new_string, bound)
        }
-        Expr::Float(string) => {
+        &Expr::Float(string, bound) => {
            let new_string =
                unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };

-            Expr::Float(new_string)
+            Expr::Float(new_string, bound)
        }
-        Expr::NonBase10Int {
+        &Expr::NonBase10Int {
            string,
            base,
            is_negative,
+            bound,
        } => {
            // don't include the minus sign here; it will not be parsed right
            Expr::NonBase10Int {
                is_negative: !is_negative,
                string,
-                base: *base,
+                base,
+                bound,
            }
        }
        _ => Expr::UnaryOp(arena.alloc(expr), Loc::at(loc_op.region, UnaryOp::Negate)),
@ -1450,16 +1452,18 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
            Ok(Pattern::RecordDestructure(patterns))
        }

-        Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
-        Expr::Num(string) => Ok(Pattern::NumLiteral(string)),
+        &Expr::Float(string, bound) => Ok(Pattern::FloatLiteral(string, bound)),
+        &Expr::Num(string, bound) => Ok(Pattern::NumLiteral(string, bound)),
        Expr::NonBase10Int {
            string,
            base,
            is_negative,
+            bound,
        } => Ok(Pattern::NonBase10Literal {
            string,
            base: *base,
            is_negative: *is_negative,
+            bound: *bound,
        }),
        // These would not have parsed as patterns
        Expr::AccessorFunction(_)
@ -2319,16 +2323,18 @@ fn positive_number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
            use crate::number_literal::NumLiteral::*;

            match literal {
-                Num(s) => Expr::Num(s),
-                Float(s) => Expr::Float(s),
+                Num(s, bound) => Expr::Num(s, bound),
+                Float(s, bound) => Expr::Float(s, bound),
                NonBase10Int {
                    string,
                    base,
                    is_negative,
+                    bound,
                } => Expr::NonBase10Int {
                    string,
                    base,
                    is_negative,
+                    bound,
                },
            }
        }
@ -2340,16 +2346,18 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
        use crate::number_literal::NumLiteral::*;

        match literal {
-            Num(s) => Expr::Num(s),
-            Float(s) => Expr::Float(s),
+            Num(s, bound) => Expr::Num(s, bound),
+            Float(s, bound) => Expr::Float(s, bound),
            NonBase10Int {
                string,
                base,
                is_negative,
+                bound,
            } => Expr::NonBase10Int {
                string,
                base,
                is_negative,
+                bound,
            },
        }
    })
--- a/compiler/parse/src/number_literal.rs
+++ b/compiler/parse/src/number_literal.rs
@ -1,14 +1,16 @@
-use crate::ast::Base;
+use crate::ast::{Base, FloatWidth, NumWidth, NumericBound};
 use crate::parser::{ENumber, ParseResult, Parser, Progress};
 use crate::state::State;

+#[derive(Debug, Copy, Clone)]
 pub enum NumLiteral<'a> {
-    Float(&'a str),
-    Num(&'a str),
+    Float(&'a str, NumericBound<FloatWidth>),
+    Num(&'a str, NumericBound<NumWidth>),
    NonBase10Int {
        string: &'a str,
        base: Base,
        is_negative: bool,
+        bound: NumericBound<NumWidth>,
    },
 }

@ -49,14 +51,104 @@ fn parse_number_base<'a>(
    bytes: &'a [u8],
    state: State<'a>,
 ) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
-    match bytes.get(0..2) {
+    let number = match bytes.get(0..2) {
        Some(b"0b") => chomp_number_base(Base::Binary, is_negated, &bytes[2..], state),
        Some(b"0o") => chomp_number_base(Base::Octal, is_negated, &bytes[2..], state),
        Some(b"0x") => chomp_number_base(Base::Hex, is_negated, &bytes[2..], state),
        _ => chomp_number_dec(is_negated, bytes, state),
+    };
+    number.and_then(|(_, literal, state)| parse_number_suffix(literal, state))
+}
+
+fn parse_number_suffix<'a>(
+    literal: NumLiteral<'a>,
+    state: State<'a>,
+) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
+    match literal {
+        NumLiteral::Float(s, _) => {
+            let (bound, state) = match get_float_suffix(state.bytes()) {
+                Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
+                None => (NumericBound::None, state),
+            };
+            Ok((Progress::MadeProgress, NumLiteral::Float(s, bound), state))
+        }
+        NumLiteral::Num(s, _) => {
+            let (bound, state) = match get_int_suffix(state.bytes()) {
+                Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
+                None => (NumericBound::None, state),
+            };
+            Ok((Progress::MadeProgress, NumLiteral::Num(s, bound), state))
+        }
+        NumLiteral::NonBase10Int {
+            string,
+            base,
+            is_negative,
+            bound: _,
+        } => {
+            let (bound, state) = match get_int_suffix(state.bytes()) {
+                Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
+                None => (NumericBound::None, state),
+            };
+            Ok((
+                Progress::MadeProgress,
+                NumLiteral::NonBase10Int {
+                    string,
+                    base,
+                    is_negative,
+                    bound,
+                },
+                state,
+            ))
+        }
    }
 }

+macro_rules! parse_num_suffix {
+    ($bytes:expr, $($suffix:expr, $width:expr)*) => {
+        $(
+            {
+                let len = $suffix.len();
+                if $bytes.starts_with($suffix)
+                    && {
+                        let next = $bytes[len..].get(0);
+                        match next { Some(c) => !(c.is_ascii_digit() || c.is_ascii_alphabetic()), None => true, }
+                    }
+                {
+                    return Some(($width, len))
+                }
+            }
+        )*
+    }
+}
+
+fn get_int_suffix<'a>(bytes: &'a [u8]) -> Option<(NumWidth, usize)> {
+    parse_num_suffix! {
+        bytes,
+        b"u8", NumWidth::U8
+        b"u16", NumWidth::U16
+        b"u32", NumWidth::U32
+        b"u64", NumWidth::U64
+        b"u128", NumWidth::U128
+        b"i8", NumWidth::I8
+        b"i16", NumWidth::I16
+        b"i32", NumWidth::I32
+        b"i64", NumWidth::I64
+        b"i128", NumWidth::I128
+        b"nat", NumWidth::Nat
+        b"dec", NumWidth::Dec
+    }
+    None
+}
+
+fn get_float_suffix<'a>(bytes: &'a [u8]) -> Option<(FloatWidth, usize)> {
+    parse_num_suffix! {
+        bytes,
+        b"f32", FloatWidth::F32
+        b"f64", FloatWidth::F64
+    }
+    None
+}
+
 fn chomp_number_base<'a>(
    base: Base,
    is_negative: bool,
@ -75,6 +167,7 @@ fn chomp_number_base<'a>(
            is_negative,
            string,
            base,
+            bound: NumericBound::None,
        },
        new,
    ))
@ -105,9 +198,9 @@ fn chomp_number_dec<'a>(
    Ok((
        Progress::MadeProgress,
        if is_float {
-            NumLiteral::Float(string)
+            NumLiteral::Float(string, NumericBound::None)
        } else {
-            NumLiteral::Num(string)
+            NumLiteral::Num(string, NumericBound::None)
        },
        new,
    ))
@ -144,8 +237,7 @@ fn chomp_number(mut bytes: &[u8]) -> (bool, usize) {
                // skip
                bytes = &bytes[1..];
            }
-            _ if byte.is_ascii_digit() || byte.is_ascii_alphabetic() => {
-                // valid digits (alphabetic in hex digits, and the `e` in `12e26` scientific notation
+            _ if byte.is_ascii_digit() => {
                bytes = &bytes[1..];
            }
            _ => {
--- a/compiler/parse/src/pattern.rs
+++ b/compiler/parse/src/pattern.rs
@ -138,16 +138,18 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
            use crate::number_literal::NumLiteral::*;

            match literal {
-                Num(s) => Pattern::NumLiteral(s),
-                Float(s) => Pattern::FloatLiteral(s),
+                Num(s, bound) => Pattern::NumLiteral(s, bound),
+                Float(s, bound) => Pattern::FloatLiteral(s, bound),
                NonBase10Int {
                    string,
                    base,
                    is_negative,
+                    bound,
                } => Pattern::NonBase10Literal {
                    string,
                    base,
                    is_negative,
+                    bound,
                },
            }
        }),