Parse number literal width suffixes

Supports [u,i][8,16,32,64,128] and [nat,dec]

Part of #2350
This commit is contained in:
ayazhafiz 2022-01-31 00:30:15 -05:00
parent 545882f210
commit 320827167f
112 changed files with 1159 additions and 127 deletions

View file

@ -1,4 +1,4 @@
use std::fmt::Debug;
use std::fmt::{Debug, Display};
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PlatformHeader};
use crate::ident::Ident;
@ -126,6 +126,70 @@ pub enum StrLiteral<'a> {
Block(&'a [&'a [StrSegment<'a>]]),
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum NumWidth {
U8,
U16,
U32,
U64,
U128,
I8,
I16,
I32,
I64,
I128,
Nat,
Dec,
}
impl Display for NumWidth {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use NumWidth::*;
f.write_str(match self {
U8 => "u8",
U16 => "u16",
U32 => "u32",
U64 => "u64",
U128 => "u128",
I8 => "i8",
I16 => "i16",
I32 => "i32",
I64 => "i64",
I128 => "i128",
Nat => "nat",
Dec => "dec",
})
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum FloatWidth {
F32,
F64,
}
impl Display for FloatWidth {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use FloatWidth::*;
f.write_str(match self {
F32 => "f32",
F64 => "f64",
})
}
}
/// Describes a bound on the width of a numeric literal.
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum NumericBound<W>
where
W: Copy,
{
/// There is no bound on the width.
None,
/// Must have exactly the width `W`.
Exact(W),
}
/// A parsed expression. This uses lifetimes extensively for two reasons:
///
/// 1. It uses Bump::alloc for all allocations, which returns a reference.
@ -138,12 +202,13 @@ pub enum StrLiteral<'a> {
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Expr<'a> {
// Number Literals
Float(&'a str),
Num(&'a str),
Float(&'a str, NumericBound<FloatWidth>),
Num(&'a str, NumericBound<NumWidth>),
NonBase10Int {
string: &'a str,
base: Base,
is_negative: bool,
bound: NumericBound<NumWidth>,
},
// String Literals
@ -431,13 +496,14 @@ pub enum Pattern<'a> {
OptionalField(&'a str, &'a Loc<Expr<'a>>),
// Literal
NumLiteral(&'a str),
NumLiteral(&'a str, NumericBound<NumWidth>),
NonBase10Literal {
string: &'a str,
base: Base,
is_negative: bool,
bound: NumericBound<NumWidth>,
},
FloatLiteral(&'a str),
FloatLiteral(&'a str, NumericBound<FloatWidth>),
StrLiteral(StrLiteral<'a>),
Underscore(&'a str),
@ -540,20 +606,27 @@ impl<'a> Pattern<'a> {
x == y
}
// Literal
(NumLiteral(x), NumLiteral(y)) => x == y,
(NumLiteral(x, bound_x), NumLiteral(y, bound_y)) => x == y && bound_x == bound_y,
(
NonBase10Literal {
string: string_x,
base: base_x,
is_negative: is_negative_x,
bound: bound_x,
},
NonBase10Literal {
string: string_y,
base: base_y,
is_negative: is_negative_y,
bound: bound_y,
},
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
(FloatLiteral(x), FloatLiteral(y)) => x == y,
) => {
string_x == string_y
&& base_x == base_y
&& is_negative_x == is_negative_y
&& bound_x == bound_y
}
(FloatLiteral(x, bound_x), FloatLiteral(y, bound_y)) => x == y && bound_x == bound_y,
(StrLiteral(x), StrLiteral(y)) => x == y,
(Underscore(x), Underscore(y)) => x == y,

View file

@ -1,6 +1,6 @@
use crate::ast::{
AliasHeader, AssignedField, Collection, CommentOrNewline, Def, Expr, ExtractSpaces, Pattern,
Spaceable, TypeAnnotation,
AliasHeader, AssignedField, Collection, CommentOrNewline, Def, Expr, ExtractSpaces,
NumericBound, Pattern, Spaceable, TypeAnnotation,
};
use crate::blankspace::{space0_after_e, space0_around_ee, space0_before_e, space0_e};
use crate::ident::{lowercase_ident, parse_ident, Ident};
@ -377,7 +377,7 @@ impl<'a> ExprState<'a> {
} else {
let region = self.expr.region;
let mut value = Expr::Num("");
let mut value = Expr::Num("", NumericBound::None);
std::mem::swap(&mut self.expr.value, &mut value);
self.expr = arena
@ -515,28 +515,30 @@ fn numeric_negate_expression<'a, T>(
let region = Region::new(start, expr.region.end());
let new_expr = match &expr.value {
Expr::Num(string) => {
&Expr::Num(string, bound) => {
let new_string =
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
Expr::Num(new_string)
Expr::Num(new_string, bound)
}
Expr::Float(string) => {
&Expr::Float(string, bound) => {
let new_string =
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
Expr::Float(new_string)
Expr::Float(new_string, bound)
}
Expr::NonBase10Int {
&Expr::NonBase10Int {
string,
base,
is_negative,
bound,
} => {
// don't include the minus sign here; it will not be parsed right
Expr::NonBase10Int {
is_negative: !is_negative,
string,
base: *base,
base,
bound,
}
}
_ => Expr::UnaryOp(arena.alloc(expr), Loc::at(loc_op.region, UnaryOp::Negate)),
@ -1450,16 +1452,18 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
Ok(Pattern::RecordDestructure(patterns))
}
Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
Expr::Num(string) => Ok(Pattern::NumLiteral(string)),
&Expr::Float(string, bound) => Ok(Pattern::FloatLiteral(string, bound)),
&Expr::Num(string, bound) => Ok(Pattern::NumLiteral(string, bound)),
Expr::NonBase10Int {
string,
base,
is_negative,
bound,
} => Ok(Pattern::NonBase10Literal {
string,
base: *base,
is_negative: *is_negative,
bound: *bound,
}),
// These would not have parsed as patterns
Expr::AccessorFunction(_)
@ -2319,16 +2323,18 @@ fn positive_number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
use crate::number_literal::NumLiteral::*;
match literal {
Num(s) => Expr::Num(s),
Float(s) => Expr::Float(s),
Num(s, bound) => Expr::Num(s, bound),
Float(s, bound) => Expr::Float(s, bound),
NonBase10Int {
string,
base,
is_negative,
bound,
} => Expr::NonBase10Int {
string,
base,
is_negative,
bound,
},
}
}
@ -2340,16 +2346,18 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
use crate::number_literal::NumLiteral::*;
match literal {
Num(s) => Expr::Num(s),
Float(s) => Expr::Float(s),
Num(s, bound) => Expr::Num(s, bound),
Float(s, bound) => Expr::Float(s, bound),
NonBase10Int {
string,
base,
is_negative,
bound,
} => Expr::NonBase10Int {
string,
base,
is_negative,
bound,
},
}
})

View file

@ -1,14 +1,16 @@
use crate::ast::Base;
use crate::ast::{Base, FloatWidth, NumWidth, NumericBound};
use crate::parser::{ENumber, ParseResult, Parser, Progress};
use crate::state::State;
#[derive(Debug, Copy, Clone)]
pub enum NumLiteral<'a> {
Float(&'a str),
Num(&'a str),
Float(&'a str, NumericBound<FloatWidth>),
Num(&'a str, NumericBound<NumWidth>),
NonBase10Int {
string: &'a str,
base: Base,
is_negative: bool,
bound: NumericBound<NumWidth>,
},
}
@ -49,14 +51,104 @@ fn parse_number_base<'a>(
bytes: &'a [u8],
state: State<'a>,
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
match bytes.get(0..2) {
let number = match bytes.get(0..2) {
Some(b"0b") => chomp_number_base(Base::Binary, is_negated, &bytes[2..], state),
Some(b"0o") => chomp_number_base(Base::Octal, is_negated, &bytes[2..], state),
Some(b"0x") => chomp_number_base(Base::Hex, is_negated, &bytes[2..], state),
_ => chomp_number_dec(is_negated, bytes, state),
};
number.and_then(|(_, literal, state)| parse_number_suffix(literal, state))
}
fn parse_number_suffix<'a>(
literal: NumLiteral<'a>,
state: State<'a>,
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
match literal {
NumLiteral::Float(s, _) => {
let (bound, state) = match get_float_suffix(state.bytes()) {
Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
None => (NumericBound::None, state),
};
Ok((Progress::MadeProgress, NumLiteral::Float(s, bound), state))
}
NumLiteral::Num(s, _) => {
let (bound, state) = match get_int_suffix(state.bytes()) {
Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
None => (NumericBound::None, state),
};
Ok((Progress::MadeProgress, NumLiteral::Num(s, bound), state))
}
NumLiteral::NonBase10Int {
string,
base,
is_negative,
bound: _,
} => {
let (bound, state) = match get_int_suffix(state.bytes()) {
Some((bound, n)) => (NumericBound::Exact(bound), state.advance(n)),
None => (NumericBound::None, state),
};
Ok((
Progress::MadeProgress,
NumLiteral::NonBase10Int {
string,
base,
is_negative,
bound,
},
state,
))
}
}
}
macro_rules! parse_num_suffix {
($bytes:expr, $($suffix:expr, $width:expr)*) => {
$(
{
let len = $suffix.len();
if $bytes.starts_with($suffix)
&& {
let next = $bytes[len..].get(0);
match next { Some(c) => !(c.is_ascii_digit() || c.is_ascii_alphabetic()), None => true, }
}
{
return Some(($width, len))
}
}
)*
}
}
fn get_int_suffix<'a>(bytes: &'a [u8]) -> Option<(NumWidth, usize)> {
parse_num_suffix! {
bytes,
b"u8", NumWidth::U8
b"u16", NumWidth::U16
b"u32", NumWidth::U32
b"u64", NumWidth::U64
b"u128", NumWidth::U128
b"i8", NumWidth::I8
b"i16", NumWidth::I16
b"i32", NumWidth::I32
b"i64", NumWidth::I64
b"i128", NumWidth::I128
b"nat", NumWidth::Nat
b"dec", NumWidth::Dec
}
None
}
fn get_float_suffix<'a>(bytes: &'a [u8]) -> Option<(FloatWidth, usize)> {
parse_num_suffix! {
bytes,
b"f32", FloatWidth::F32
b"f64", FloatWidth::F64
}
None
}
fn chomp_number_base<'a>(
base: Base,
is_negative: bool,
@ -75,6 +167,7 @@ fn chomp_number_base<'a>(
is_negative,
string,
base,
bound: NumericBound::None,
},
new,
))
@ -105,9 +198,9 @@ fn chomp_number_dec<'a>(
Ok((
Progress::MadeProgress,
if is_float {
NumLiteral::Float(string)
NumLiteral::Float(string, NumericBound::None)
} else {
NumLiteral::Num(string)
NumLiteral::Num(string, NumericBound::None)
},
new,
))
@ -144,8 +237,7 @@ fn chomp_number(mut bytes: &[u8]) -> (bool, usize) {
// skip
bytes = &bytes[1..];
}
_ if byte.is_ascii_digit() || byte.is_ascii_alphabetic() => {
// valid digits (alphabetic in hex digits, and the `e` in `12e26` scientific notation
_ if byte.is_ascii_digit() => {
bytes = &bytes[1..];
}
_ => {

View file

@ -138,16 +138,18 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
use crate::number_literal::NumLiteral::*;
match literal {
Num(s) => Pattern::NumLiteral(s),
Float(s) => Pattern::FloatLiteral(s),
Num(s, bound) => Pattern::NumLiteral(s, bound),
Float(s, bound) => Pattern::FloatLiteral(s, bound),
NonBase10Int {
string,
base,
is_negative,
bound,
} => Pattern::NonBase10Literal {
string,
base,
is_negative,
bound,
},
}
}),