mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 06:44:46 +00:00
Parse number literal width suffixes
Supports [u,i][8,16,32,64,128] and [nat,dec] Part of #2350
This commit is contained in:
parent
545882f210
commit
320827167f
112 changed files with 1159 additions and 127 deletions
|
@ -1,4 +1,4 @@
|
|||
use std::fmt::Debug;
|
||||
use std::fmt::{Debug, Display};
|
||||
|
||||
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PlatformHeader};
|
||||
use crate::ident::Ident;
|
||||
|
@ -126,6 +126,70 @@ pub enum StrLiteral<'a> {
|
|||
Block(&'a [&'a [StrSegment<'a>]]),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
pub enum NumWidth {
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
U64,
|
||||
U128,
|
||||
I8,
|
||||
I16,
|
||||
I32,
|
||||
I64,
|
||||
I128,
|
||||
Nat,
|
||||
Dec,
|
||||
}
|
||||
|
||||
impl Display for NumWidth {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use NumWidth::*;
|
||||
f.write_str(match self {
|
||||
U8 => "u8",
|
||||
U16 => "u16",
|
||||
U32 => "u32",
|
||||
U64 => "u64",
|
||||
U128 => "u128",
|
||||
I8 => "i8",
|
||||
I16 => "i16",
|
||||
I32 => "i32",
|
||||
I64 => "i64",
|
||||
I128 => "i128",
|
||||
Nat => "nat",
|
||||
Dec => "dec",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
pub enum FloatWidth {
|
||||
F32,
|
||||
F64,
|
||||
}
|
||||
|
||||
impl Display for FloatWidth {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use FloatWidth::*;
|
||||
f.write_str(match self {
|
||||
F32 => "f32",
|
||||
F64 => "f64",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes a bound on the width of a numeric literal.
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
pub enum NumericBound<W>
|
||||
where
|
||||
W: Copy,
|
||||
{
|
||||
/// There is no bound on the width.
|
||||
None,
|
||||
/// Must have exactly the width `W`.
|
||||
Exact(W),
|
||||
}
|
||||
|
||||
/// A parsed expression. This uses lifetimes extensively for two reasons:
|
||||
///
|
||||
/// 1. It uses Bump::alloc for all allocations, which returns a reference.
|
||||
|
@ -138,12 +202,13 @@ pub enum StrLiteral<'a> {
|
|||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub enum Expr<'a> {
|
||||
// Number Literals
|
||||
Float(&'a str),
|
||||
Num(&'a str),
|
||||
Float(&'a str, NumericBound<FloatWidth>),
|
||||
Num(&'a str, NumericBound<NumWidth>),
|
||||
NonBase10Int {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bound: NumericBound<NumWidth>,
|
||||
},
|
||||
|
||||
// String Literals
|
||||
|
@ -431,13 +496,14 @@ pub enum Pattern<'a> {
|
|||
OptionalField(&'a str, &'a Loc<Expr<'a>>),
|
||||
|
||||
// Literal
|
||||
NumLiteral(&'a str),
|
||||
NumLiteral(&'a str, NumericBound<NumWidth>),
|
||||
NonBase10Literal {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bound: NumericBound<NumWidth>,
|
||||
},
|
||||
FloatLiteral(&'a str),
|
||||
FloatLiteral(&'a str, NumericBound<FloatWidth>),
|
||||
StrLiteral(StrLiteral<'a>),
|
||||
Underscore(&'a str),
|
||||
|
||||
|
@ -540,20 +606,27 @@ impl<'a> Pattern<'a> {
|
|||
x == y
|
||||
}
|
||||
// Literal
|
||||
(NumLiteral(x), NumLiteral(y)) => x == y,
|
||||
(NumLiteral(x, bound_x), NumLiteral(y, bound_y)) => x == y && bound_x == bound_y,
|
||||
(
|
||||
NonBase10Literal {
|
||||
string: string_x,
|
||||
base: base_x,
|
||||
is_negative: is_negative_x,
|
||||
bound: bound_x,
|
||||
},
|
||||
NonBase10Literal {
|
||||
string: string_y,
|
||||
base: base_y,
|
||||
is_negative: is_negative_y,
|
||||
bound: bound_y,
|
||||
},
|
||||
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
|
||||
(FloatLiteral(x), FloatLiteral(y)) => x == y,
|
||||
) => {
|
||||
string_x == string_y
|
||||
&& base_x == base_y
|
||||
&& is_negative_x == is_negative_y
|
||||
&& bound_x == bound_y
|
||||
}
|
||||
(FloatLiteral(x, bound_x), FloatLiteral(y, bound_y)) => x == y && bound_x == bound_y,
|
||||
(StrLiteral(x), StrLiteral(y)) => x == y,
|
||||
(Underscore(x), Underscore(y)) => x == y,
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::ast::{
|
||||
AliasHeader, AssignedField, Collection, CommentOrNewline, Def, Expr, ExtractSpaces, Pattern,
|
||||
Spaceable, TypeAnnotation,
|
||||
AliasHeader, AssignedField, Collection, CommentOrNewline, Def, Expr, ExtractSpaces,
|
||||
NumericBound, Pattern, Spaceable, TypeAnnotation,
|
||||
};
|
||||
use crate::blankspace::{space0_after_e, space0_around_ee, space0_before_e, space0_e};
|
||||
use crate::ident::{lowercase_ident, parse_ident, Ident};
|
||||
|
@ -377,7 +377,7 @@ impl<'a> ExprState<'a> {
|
|||
} else {
|
||||
let region = self.expr.region;
|
||||
|
||||
let mut value = Expr::Num("");
|
||||
let mut value = Expr::Num("", NumericBound::None);
|
||||
std::mem::swap(&mut self.expr.value, &mut value);
|
||||
|
||||
self.expr = arena
|
||||
|
@ -515,28 +515,30 @@ fn numeric_negate_expression<'a, T>(
|
|||
let region = Region::new(start, expr.region.end());
|
||||
|
||||
let new_expr = match &expr.value {
|
||||
Expr::Num(string) => {
|
||||
&Expr::Num(string, bound) => {
|
||||
let new_string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
|
||||
|
||||
Expr::Num(new_string)
|
||||
Expr::Num(new_string, bound)
|
||||
}
|
||||
Expr::Float(string) => {
|
||||
&Expr::Float(string, bound) => {
|
||||
let new_string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
|
||||
|
||||
Expr::Float(new_string)
|
||||
Expr::Float(new_string, bound)
|
||||
}
|
||||
Expr::NonBase10Int {
|
||||
&Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => {
|
||||
// don't include the minus sign here; it will not be parsed right
|
||||
Expr::NonBase10Int {
|
||||
is_negative: !is_negative,
|
||||
string,
|
||||
base: *base,
|
||||
base,
|
||||
bound,
|
||||
}
|
||||
}
|
||||
_ => Expr::UnaryOp(arena.alloc(expr), Loc::at(loc_op.region, UnaryOp::Negate)),
|
||||
|
@ -1450,16 +1452,18 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
|
|||
Ok(Pattern::RecordDestructure(patterns))
|
||||
}
|
||||
|
||||
Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
|
||||
Expr::Num(string) => Ok(Pattern::NumLiteral(string)),
|
||||
&Expr::Float(string, bound) => Ok(Pattern::FloatLiteral(string, bound)),
|
||||
&Expr::Num(string, bound) => Ok(Pattern::NumLiteral(string, bound)),
|
||||
Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Ok(Pattern::NonBase10Literal {
|
||||
string,
|
||||
base: *base,
|
||||
is_negative: *is_negative,
|
||||
bound: *bound,
|
||||
}),
|
||||
// These would not have parsed as patterns
|
||||
Expr::AccessorFunction(_)
|
||||
|
@ -2319,16 +2323,18 @@ fn positive_number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
|
|||
use crate::number_literal::NumLiteral::*;
|
||||
|
||||
match literal {
|
||||
Num(s) => Expr::Num(s),
|
||||
Float(s) => Expr::Float(s),
|
||||
Num(s, bound) => Expr::Num(s, bound),
|
||||
Float(s, bound) => Expr::Float(s, bound),
|
||||
NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -2340,16 +2346,18 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
|
|||
use crate::number_literal::NumLiteral::*;
|
||||
|
||||
match literal {
|
||||
Num(s) => Expr::Num(s),
|
||||
Float(s) => Expr::Float(s),
|
||||
Num(s, bound) => Expr::Num(s, bound),
|
||||
Float(s, bound) => Expr::Float(s, bound),
|
||||
NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
}
|
||||
})
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
use crate::ast::Base;
|
||||
use crate::ast::{Base, FloatWidth, NumWidth, NumericBound};
|
||||
use crate::parser::{ENumber, ParseResult, Parser, Progress};
|
||||
use crate::state::State;
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum NumLiteral<'a> {
|
||||
Float(&'a str),
|
||||
Num(&'a str),
|
||||
Float(&'a str, NumericBound<FloatWidth>),
|
||||
Num(&'a str, NumericBound<NumWidth>),
|
||||
NonBase10Int {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bound: NumericBound<NumWidth>,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -49,14 +51,104 @@ fn parse_number_base<'a>(
|
|||
bytes: &'a [u8],
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
|
||||
match bytes.get(0..2) {
|
||||
let number = match bytes.get(0..2) {
|
||||
Some(b"0b") => chomp_number_base(Base::Binary, is_negated, &bytes[2..], state),
|
||||
Some(b"0o") => chomp_number_base(Base::Octal, is_negated, &bytes[2..], state),
|
||||
Some(b"0x") => chomp_number_base(Base::Hex, is_negated, &bytes[2..], state),
|
||||
_ => chomp_number_dec(is_negated, bytes, state),
|
||||
};
|
||||
number.and_then(|(_, literal, state)| parse_number_suffix(literal, state))
|
||||
}
|
||||
|
||||
fn parse_number_suffix<'a>(
|
||||
literal: NumLiteral<'a>,
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
|
||||
match literal {
|
||||
NumLiteral::Float(s, _) => {
|
||||
let (bound, state) = match get_float_suffix(state.bytes()) {
|
||||
Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
|
||||
None => (NumericBound::None, state),
|
||||
};
|
||||
Ok((Progress::MadeProgress, NumLiteral::Float(s, bound), state))
|
||||
}
|
||||
NumLiteral::Num(s, _) => {
|
||||
let (bound, state) = match get_int_suffix(state.bytes()) {
|
||||
Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
|
||||
None => (NumericBound::None, state),
|
||||
};
|
||||
Ok((Progress::MadeProgress, NumLiteral::Num(s, bound), state))
|
||||
}
|
||||
NumLiteral::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound: _,
|
||||
} => {
|
||||
let (bound, state) = match get_int_suffix(state.bytes()) {
|
||||
Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
|
||||
None => (NumericBound::None, state),
|
||||
};
|
||||
Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
state,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! parse_num_suffix {
|
||||
($bytes:expr, $($suffix:expr, $width:expr)*) => {
|
||||
$(
|
||||
{
|
||||
let len = $suffix.len();
|
||||
if $bytes.starts_with($suffix)
|
||||
&& {
|
||||
let next = $bytes[len..].get(0);
|
||||
match next { Some(c) => !(c.is_ascii_digit() || c.is_ascii_alphabetic()), None => true, }
|
||||
}
|
||||
{
|
||||
return Some(($width, len))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
fn get_int_suffix<'a>(bytes: &'a [u8]) -> Option<(NumWidth, usize)> {
|
||||
parse_num_suffix! {
|
||||
bytes,
|
||||
b"u8", NumWidth::U8
|
||||
b"u16", NumWidth::U16
|
||||
b"u32", NumWidth::U32
|
||||
b"u64", NumWidth::U64
|
||||
b"u128", NumWidth::U128
|
||||
b"i8", NumWidth::I8
|
||||
b"i16", NumWidth::I16
|
||||
b"i32", NumWidth::I32
|
||||
b"i64", NumWidth::I64
|
||||
b"i128", NumWidth::I128
|
||||
b"nat", NumWidth::Nat
|
||||
b"dec", NumWidth::Dec
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn get_float_suffix<'a>(bytes: &'a [u8]) -> Option<(FloatWidth, usize)> {
|
||||
parse_num_suffix! {
|
||||
bytes,
|
||||
b"f32", FloatWidth::F32
|
||||
b"f64", FloatWidth::F64
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn chomp_number_base<'a>(
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
|
@ -75,6 +167,7 @@ fn chomp_number_base<'a>(
|
|||
is_negative,
|
||||
string,
|
||||
base,
|
||||
bound: NumericBound::None,
|
||||
},
|
||||
new,
|
||||
))
|
||||
|
@ -105,9 +198,9 @@ fn chomp_number_dec<'a>(
|
|||
Ok((
|
||||
Progress::MadeProgress,
|
||||
if is_float {
|
||||
NumLiteral::Float(string)
|
||||
NumLiteral::Float(string, NumericBound::None)
|
||||
} else {
|
||||
NumLiteral::Num(string)
|
||||
NumLiteral::Num(string, NumericBound::None)
|
||||
},
|
||||
new,
|
||||
))
|
||||
|
@ -144,8 +237,7 @@ fn chomp_number(mut bytes: &[u8]) -> (bool, usize) {
|
|||
// skip
|
||||
bytes = &bytes[1..];
|
||||
}
|
||||
_ if byte.is_ascii_digit() || byte.is_ascii_alphabetic() => {
|
||||
// valid digits (alphabetic in hex digits, and the `e` in `12e26` scientific notation
|
||||
_ if byte.is_ascii_digit() => {
|
||||
bytes = &bytes[1..];
|
||||
}
|
||||
_ => {
|
||||
|
|
|
@ -138,16 +138,18 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
|||
use crate::number_literal::NumLiteral::*;
|
||||
|
||||
match literal {
|
||||
Num(s) => Pattern::NumLiteral(s),
|
||||
Float(s) => Pattern::FloatLiteral(s),
|
||||
Num(s, bound) => Pattern::NumLiteral(s, bound),
|
||||
Float(s, bound) => Pattern::FloatLiteral(s, bound),
|
||||
NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Pattern::NonBase10Literal {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
}
|
||||
}),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue