Parse and expand numeric bounds in canonicalization pass

This commit is contained in:
ayazhafiz 2022-02-01 22:48:29 -05:00
parent 17c5fe0bff
commit a6f7579c07
113 changed files with 472 additions and 1361 deletions

View file

@ -5,7 +5,6 @@ use crate::ident::Ident;
use bumpalo::collections::{String, Vec};
use bumpalo::Bump;
use roc_module::called_via::{BinOp, CalledVia, UnaryOp};
use roc_module::numeric::{FloatWidth, IntWidth, NumWidth};
use roc_region::all::{Loc, Position, Region};
#[derive(Debug)]
@ -139,14 +138,12 @@ pub enum StrLiteral<'a> {
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Expr<'a> {
// Number Literals
Float(&'a str, NumericBound<FloatWidth>),
Num(&'a str, NumericBound<NumWidth>),
Int(&'a str, NumericBound<IntWidth>),
Float(&'a str),
Num(&'a str),
NonBase10Int {
string: &'a str,
base: Base,
is_negative: bool,
bound: NumericBound<IntWidth>,
},
// String Literals
@ -411,9 +408,6 @@ impl<'a> CommentOrNewline<'a> {
}
}
/// A `NumericBound` with the unit type as a placeholder width variable.
pub type NumericBound<W> = roc_module::numeric::NumericBound<W, ()>;
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Pattern<'a> {
// Identifier
@ -437,15 +431,13 @@ pub enum Pattern<'a> {
OptionalField(&'a str, &'a Loc<Expr<'a>>),
// Literal
NumLiteral(&'a str, NumericBound<NumWidth>),
NumLiteral(&'a str),
NonBase10Literal {
string: &'a str,
base: Base,
is_negative: bool,
bound: NumericBound<IntWidth>,
},
FloatLiteral(&'a str, NumericBound<FloatWidth>),
IntLiteral(&'a str, NumericBound<IntWidth>),
FloatLiteral(&'a str),
StrLiteral(StrLiteral<'a>),
Underscore(&'a str),
@ -548,27 +540,20 @@ impl<'a> Pattern<'a> {
x == y
}
// Literal
(NumLiteral(x, bound_x), NumLiteral(y, bound_y)) => x == y && bound_x == bound_y,
(NumLiteral(x), NumLiteral(y)) => x == y,
(
NonBase10Literal {
string: string_x,
base: base_x,
is_negative: is_negative_x,
bound: bound_x,
},
NonBase10Literal {
string: string_y,
base: base_y,
is_negative: is_negative_y,
bound: bound_y,
},
) => {
string_x == string_y
&& base_x == base_y
&& is_negative_x == is_negative_y
&& bound_x == bound_y
}
(FloatLiteral(x, bound_x), FloatLiteral(y, bound_y)) => x == y && bound_x == bound_y,
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
(FloatLiteral(x), FloatLiteral(y)) => x == y,
(StrLiteral(x), StrLiteral(y)) => x == y,
(Underscore(x), Underscore(y)) => x == y,

View file

@ -16,7 +16,6 @@ use crate::type_annotation;
use bumpalo::collections::Vec;
use bumpalo::Bump;
use roc_module::called_via::{BinOp, CalledVia, UnaryOp};
use roc_module::numeric::NumericBound;
use roc_region::all::{Loc, Position, Region};
use crate::parser::Progress::{self, *};
@ -378,7 +377,7 @@ impl<'a> ExprState<'a> {
} else {
let region = self.expr.region;
let mut value = Expr::Num("", NumericBound::None { width_variable: () });
let mut value = Expr::Num("");
std::mem::swap(&mut self.expr.value, &mut value);
self.expr = arena
@ -516,30 +515,28 @@ fn numeric_negate_expression<'a, T>(
let region = Region::new(start, expr.region.end());
let new_expr = match expr.value {
Expr::Num(string, bound) => {
Expr::Num(string) => {
let new_string =
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
Expr::Num(new_string, bound)
Expr::Num(new_string)
}
Expr::Float(string, bound) => {
Expr::Float(string) => {
let new_string =
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
Expr::Float(new_string, bound)
Expr::Float(new_string)
}
Expr::NonBase10Int {
string,
base,
is_negative,
bound,
} => {
// don't include the minus sign here; it will not be parsed right
Expr::NonBase10Int {
is_negative: !is_negative,
string,
base,
bound,
}
}
_ => Expr::UnaryOp(arena.alloc(expr), Loc::at(loc_op.region, UnaryOp::Negate)),
@ -1453,19 +1450,16 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
Ok(Pattern::RecordDestructure(patterns))
}
&Expr::Float(string, bound) => Ok(Pattern::FloatLiteral(string, bound)),
&Expr::Num(string, bound) => Ok(Pattern::NumLiteral(string, bound)),
&Expr::Int(string, bound) => Ok(Pattern::IntLiteral(string, bound)),
&Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
&Expr::Num(string) => Ok(Pattern::NumLiteral(string)),
Expr::NonBase10Int {
string,
base,
is_negative,
bound,
} => Ok(Pattern::NonBase10Literal {
string,
base: *base,
is_negative: *is_negative,
bound: *bound,
}),
// These would not have parsed as patterns
Expr::AccessorFunction(_)
@ -2325,19 +2319,16 @@ fn positive_number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
use crate::number_literal::NumLiteral::*;
match literal {
Num(s, bound) => Expr::Num(s, bound),
Float(s, bound) => Expr::Float(s, bound),
Int(s, bound) => Expr::Int(s, bound),
Num(s) => Expr::Num(s),
Float(s) => Expr::Float(s),
NonBase10Int {
string,
base,
is_negative,
bound,
} => Expr::NonBase10Int {
string,
base,
is_negative,
bound,
},
}
}
@ -2349,19 +2340,16 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
use crate::number_literal::NumLiteral::*;
match literal {
Num(s, bound) => Expr::Num(s, bound),
Float(s, bound) => Expr::Float(s, bound),
Int(s, bound) => Expr::Int(s, bound),
Num(s) => Expr::Num(s),
Float(s) => Expr::Float(s),
NonBase10Int {
string,
base,
is_negative,
bound,
} => Expr::NonBase10Int {
string,
base,
is_negative,
bound,
},
}
})

View file

@ -1,18 +1,14 @@
use crate::ast::{Base, NumericBound};
use crate::ast::Base;
use crate::parser::{ENumber, ParseResult, Parser, Progress};
use crate::state::State;
use roc_module::numeric::{FloatWidth, IntWidth, NumWidth};
#[derive(Debug, Copy, Clone)]
pub enum NumLiteral<'a> {
Float(&'a str, NumericBound<FloatWidth>),
Int(&'a str, NumericBound<IntWidth>),
Num(&'a str, NumericBound<NumWidth>),
Float(&'a str),
Num(&'a str),
NonBase10Int {
string: &'a str,
base: Base,
is_negative: bool,
bound: NumericBound<IntWidth>,
},
}
@ -61,101 +57,27 @@ fn parse_number_base<'a>(
}
}
macro_rules! parse_num_suffix {
($bytes:expr, $($suffix:expr, $width:expr)*) => {
$(
{
let len = $suffix.len();
if $bytes.starts_with($suffix)
&& {
let next = $bytes[len..].get(0);
match next { Some(c) => !(c.is_ascii_digit() || c.is_ascii_alphabetic()), None => true, }
}
{
return Some(($width, len))
}
}
)*
}
}
fn get_int_suffix(bytes: &[u8]) -> Option<(IntWidth, usize)> {
parse_num_suffix! {
bytes,
b"u8", IntWidth::U8
b"u16", IntWidth::U16
b"u32", IntWidth::U32
b"u64", IntWidth::U64
b"u128", IntWidth::U128
b"i8", IntWidth::I8
b"i16", IntWidth::I16
b"i32", IntWidth::I32
b"i64", IntWidth::I64
b"i128", IntWidth::I128
b"nat", IntWidth::Nat
}
None
}
fn get_float_suffix(bytes: &[u8]) -> Option<(FloatWidth, usize)> {
parse_num_suffix! {
bytes,
b"dec", FloatWidth::Dec
b"f32", FloatWidth::F32
b"f64", FloatWidth::F64
}
None
}
fn get_num_suffix(bytes: &[u8]) -> Option<(NumWidth, usize)> {
(get_int_suffix(bytes).map(|(iw, l)| (NumWidth::Int(iw), l)))
.or_else(|| get_float_suffix(bytes).map(|(fw, l)| (NumWidth::Float(fw), l)))
}
fn chomp_number_base<'a>(
base: Base,
is_negative: bool,
bytes: &'a [u8],
state: State<'a>,
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
let (_, (_is_float, bound, chomped), state) =
chomp_number(bytes, state, is_negative, base == Base::Hex)?;
let (_is_float, chomped) = chomp_number(bytes);
let (bound, chomped_number) = if let Some((bound, chomped_before_suffix)) = bound {
(Some(bound), chomped_before_suffix)
} else {
(None, chomped)
};
let string = unsafe { std::str::from_utf8_unchecked(&bytes[..chomped_number]) };
let string = unsafe { std::str::from_utf8_unchecked(&bytes[..chomped]) };
let new = state.advance(chomped + 2 + is_negative as usize);
match bound {
None => Ok((
Progress::MadeProgress,
NumLiteral::NonBase10Int {
is_negative,
string,
base,
bound: NumericBound::None { width_variable: () },
},
new,
)),
Some(NumWidth::Int(iw)) => Ok((
Progress::MadeProgress,
NumLiteral::NonBase10Int {
is_negative,
string,
base,
bound: NumericBound::Exact(iw),
},
new,
)),
Some(NumWidth::Float(_)) => {
Err((Progress::MadeProgress, ENumber::IntHasFloatSuffix, state))
}
}
Ok((
Progress::MadeProgress,
NumLiteral::NonBase10Int {
is_negative,
string,
base,
},
new,
))
}
fn chomp_number_dec<'a>(
@ -163,62 +85,37 @@ fn chomp_number_dec<'a>(
bytes: &'a [u8],
state: State<'a>,
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
let (_, (is_float, bound, chomped), state) = chomp_number(bytes, state, is_negative, false)?;
let (is_float, chomped) = chomp_number(bytes);
if is_negative && chomped == 0 {
// we're probably actually looking at unary negation here
return Err((Progress::NoProgress, ENumber::End, state));
}
if !bytes.get(0).copied().unwrap_or_default().is_ascii_digit() {
// we're probably actually looking at unary negation here
return Err((Progress::NoProgress, ENumber::End, state));
}
let (bound, chomped_number) = if let Some((bound, chomped_before_suffix)) = bound {
(Some(bound), chomped_before_suffix)
} else {
(None, chomped)
};
let string = unsafe {
std::str::from_utf8_unchecked(&state.bytes()[0..chomped_number + is_negative as usize])
};
let string =
unsafe { std::str::from_utf8_unchecked(&state.bytes()[0..chomped + is_negative as usize]) };
let new = state.advance(chomped + is_negative as usize);
match (is_float, bound) {
(true, None) => Ok((
Progress::MadeProgress,
NumLiteral::Float(string, NumericBound::None { width_variable: () }),
new,
)),
(false, None) => Ok((
Progress::MadeProgress,
NumLiteral::Num(string, NumericBound::None { width_variable: () }),
new,
)),
(_, Some(NumWidth::Float(fw))) => Ok((
Progress::MadeProgress,
NumLiteral::Float(string, NumericBound::Exact(fw)),
new,
)),
(false, Some(NumWidth::Int(iw))) => Ok((
Progress::MadeProgress,
NumLiteral::Int(string, NumericBound::Exact(iw)),
new,
)),
(true, Some(NumWidth::Int(_))) => {
Err((Progress::MadeProgress, ENumber::FloatHasIntSuffix, state))
}
}
Ok((
Progress::MadeProgress,
if is_float {
NumLiteral::Float(string)
} else {
NumLiteral::Num(string)
},
new,
))
}
#[allow(clippy::type_complexity)]
fn chomp_number<'a>(
mut bytes: &'a [u8],
state: State<'a>,
is_negative: bool,
hex: bool,
) -> ParseResult<'a, (bool, Option<(NumWidth, usize)>, usize), ENumber> {
fn chomp_number(mut bytes: &[u8]) -> (bool, usize) {
let start_bytes_len = bytes.len();
let mut is_float = false;
let mut suffix_and_chomped_before = None;
while let Some(byte) = bytes.get(0) {
match byte {
@ -247,69 +144,18 @@ fn chomp_number<'a>(
// skip
bytes = &bytes[1..];
}
_ if byte.is_ascii_digit() => {
_ if byte.is_ascii_digit() || byte.is_ascii_alphabetic() => {
// valid digits (alphabetic in hex digits, and the `e` in `12e26` scientific notation
bytes = &bytes[1..];
}
_ if byte.is_ascii_hexdigit() && hex => {
bytes = &bytes[1..];
}
_ if byte.is_ascii_whitespace() || byte.is_ascii_punctuation() => {
// not a valid digit; we're done
return Ok((
Progress::MadeProgress,
(
is_float,
suffix_and_chomped_before,
start_bytes_len - bytes.len(),
),
state,
));
}
_ => {
// This might be a suffix; try that first.
let parsed_suffix = if suffix_and_chomped_before.is_none() {
get_num_suffix(bytes)
} else {
None
};
if let Some((bound, advanced_by)) = parsed_suffix {
suffix_and_chomped_before = Some((bound, start_bytes_len - bytes.len()));
bytes = &bytes[advanced_by..];
continue;
}
// Okay, this number is invalid.
if start_bytes_len - bytes.len() == 0 && is_negative {
// We're probably actually looking at unary negation here. Reset the progress.
return Err((Progress::NoProgress, ENumber::End, state));
}
if bytes
.get(0)
.copied()
.unwrap_or_default()
.is_ascii_alphabetic()
{
// The user likely mistyped a literal suffix type here.
return Err((
Progress::MadeProgress,
ENumber::LiteralSuffix,
state.advance(start_bytes_len - bytes.len()),
));
}
return Err((Progress::MadeProgress, ENumber::End, state));
// not a valid digit; we're done
return (is_float, start_bytes_len - bytes.len());
}
}
}
// if the above loop exits, we must be dealing with an empty slice
// therefore we parsed all of the bytes in the input
Ok((
Progress::MadeProgress,
(is_float, suffix_and_chomped_before, start_bytes_len),
state,
))
(is_float, start_bytes_len)
}

View file

@ -338,9 +338,6 @@ pub enum EExpr<'a> {
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ENumber {
End,
LiteralSuffix,
IntHasFloatSuffix,
FloatHasIntSuffix,
}
#[derive(Debug, Clone, PartialEq, Eq)]

View file

@ -138,19 +138,16 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
use crate::number_literal::NumLiteral::*;
match literal {
Num(s, bound) => Pattern::NumLiteral(s, bound),
Float(s, bound) => Pattern::FloatLiteral(s, bound),
Int(s, bound) => Pattern::IntLiteral(s, bound),
Num(s) => Pattern::NumLiteral(s),
Float(s) => Pattern::FloatLiteral(s),
NonBase10Int {
string,
base,
is_negative,
bound,
} => Pattern::NonBase10Literal {
string,
base,
is_negative,
bound,
},
}
}),