mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 14:54:47 +00:00
Parse and expand numeric bounds in canonicalization pass
This commit is contained in:
parent
17c5fe0bff
commit
a6f7579c07
113 changed files with 472 additions and 1361 deletions
|
@ -5,7 +5,6 @@ use crate::ident::Ident;
|
|||
use bumpalo::collections::{String, Vec};
|
||||
use bumpalo::Bump;
|
||||
use roc_module::called_via::{BinOp, CalledVia, UnaryOp};
|
||||
use roc_module::numeric::{FloatWidth, IntWidth, NumWidth};
|
||||
use roc_region::all::{Loc, Position, Region};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -139,14 +138,12 @@ pub enum StrLiteral<'a> {
|
|||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub enum Expr<'a> {
|
||||
// Number Literals
|
||||
Float(&'a str, NumericBound<FloatWidth>),
|
||||
Num(&'a str, NumericBound<NumWidth>),
|
||||
Int(&'a str, NumericBound<IntWidth>),
|
||||
Float(&'a str),
|
||||
Num(&'a str),
|
||||
NonBase10Int {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bound: NumericBound<IntWidth>,
|
||||
},
|
||||
|
||||
// String Literals
|
||||
|
@ -411,9 +408,6 @@ impl<'a> CommentOrNewline<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// A `NumericBound` with the unit type as a placeholder width variable.
|
||||
pub type NumericBound<W> = roc_module::numeric::NumericBound<W, ()>;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub enum Pattern<'a> {
|
||||
// Identifier
|
||||
|
@ -437,15 +431,13 @@ pub enum Pattern<'a> {
|
|||
OptionalField(&'a str, &'a Loc<Expr<'a>>),
|
||||
|
||||
// Literal
|
||||
NumLiteral(&'a str, NumericBound<NumWidth>),
|
||||
NumLiteral(&'a str),
|
||||
NonBase10Literal {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bound: NumericBound<IntWidth>,
|
||||
},
|
||||
FloatLiteral(&'a str, NumericBound<FloatWidth>),
|
||||
IntLiteral(&'a str, NumericBound<IntWidth>),
|
||||
FloatLiteral(&'a str),
|
||||
StrLiteral(StrLiteral<'a>),
|
||||
Underscore(&'a str),
|
||||
|
||||
|
@ -548,27 +540,20 @@ impl<'a> Pattern<'a> {
|
|||
x == y
|
||||
}
|
||||
// Literal
|
||||
(NumLiteral(x, bound_x), NumLiteral(y, bound_y)) => x == y && bound_x == bound_y,
|
||||
(NumLiteral(x), NumLiteral(y)) => x == y,
|
||||
(
|
||||
NonBase10Literal {
|
||||
string: string_x,
|
||||
base: base_x,
|
||||
is_negative: is_negative_x,
|
||||
bound: bound_x,
|
||||
},
|
||||
NonBase10Literal {
|
||||
string: string_y,
|
||||
base: base_y,
|
||||
is_negative: is_negative_y,
|
||||
bound: bound_y,
|
||||
},
|
||||
) => {
|
||||
string_x == string_y
|
||||
&& base_x == base_y
|
||||
&& is_negative_x == is_negative_y
|
||||
&& bound_x == bound_y
|
||||
}
|
||||
(FloatLiteral(x, bound_x), FloatLiteral(y, bound_y)) => x == y && bound_x == bound_y,
|
||||
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
|
||||
(FloatLiteral(x), FloatLiteral(y)) => x == y,
|
||||
(StrLiteral(x), StrLiteral(y)) => x == y,
|
||||
(Underscore(x), Underscore(y)) => x == y,
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@ use crate::type_annotation;
|
|||
use bumpalo::collections::Vec;
|
||||
use bumpalo::Bump;
|
||||
use roc_module::called_via::{BinOp, CalledVia, UnaryOp};
|
||||
use roc_module::numeric::NumericBound;
|
||||
use roc_region::all::{Loc, Position, Region};
|
||||
|
||||
use crate::parser::Progress::{self, *};
|
||||
|
@ -378,7 +377,7 @@ impl<'a> ExprState<'a> {
|
|||
} else {
|
||||
let region = self.expr.region;
|
||||
|
||||
let mut value = Expr::Num("", NumericBound::None { width_variable: () });
|
||||
let mut value = Expr::Num("");
|
||||
std::mem::swap(&mut self.expr.value, &mut value);
|
||||
|
||||
self.expr = arena
|
||||
|
@ -516,30 +515,28 @@ fn numeric_negate_expression<'a, T>(
|
|||
let region = Region::new(start, expr.region.end());
|
||||
|
||||
let new_expr = match expr.value {
|
||||
Expr::Num(string, bound) => {
|
||||
Expr::Num(string) => {
|
||||
let new_string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
|
||||
|
||||
Expr::Num(new_string, bound)
|
||||
Expr::Num(new_string)
|
||||
}
|
||||
Expr::Float(string, bound) => {
|
||||
Expr::Float(string) => {
|
||||
let new_string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
|
||||
|
||||
Expr::Float(new_string, bound)
|
||||
Expr::Float(new_string)
|
||||
}
|
||||
Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => {
|
||||
// don't include the minus sign here; it will not be parsed right
|
||||
Expr::NonBase10Int {
|
||||
is_negative: !is_negative,
|
||||
string,
|
||||
base,
|
||||
bound,
|
||||
}
|
||||
}
|
||||
_ => Expr::UnaryOp(arena.alloc(expr), Loc::at(loc_op.region, UnaryOp::Negate)),
|
||||
|
@ -1453,19 +1450,16 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<
|
|||
Ok(Pattern::RecordDestructure(patterns))
|
||||
}
|
||||
|
||||
&Expr::Float(string, bound) => Ok(Pattern::FloatLiteral(string, bound)),
|
||||
&Expr::Num(string, bound) => Ok(Pattern::NumLiteral(string, bound)),
|
||||
&Expr::Int(string, bound) => Ok(Pattern::IntLiteral(string, bound)),
|
||||
&Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
|
||||
&Expr::Num(string) => Ok(Pattern::NumLiteral(string)),
|
||||
Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Ok(Pattern::NonBase10Literal {
|
||||
string,
|
||||
base: *base,
|
||||
is_negative: *is_negative,
|
||||
bound: *bound,
|
||||
}),
|
||||
// These would not have parsed as patterns
|
||||
Expr::AccessorFunction(_)
|
||||
|
@ -2325,19 +2319,16 @@ fn positive_number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
|
|||
use crate::number_literal::NumLiteral::*;
|
||||
|
||||
match literal {
|
||||
Num(s, bound) => Expr::Num(s, bound),
|
||||
Float(s, bound) => Expr::Float(s, bound),
|
||||
Int(s, bound) => Expr::Int(s, bound),
|
||||
Num(s) => Expr::Num(s),
|
||||
Float(s) => Expr::Float(s),
|
||||
NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -2349,19 +2340,16 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
|
|||
use crate::number_literal::NumLiteral::*;
|
||||
|
||||
match literal {
|
||||
Num(s, bound) => Expr::Num(s, bound),
|
||||
Float(s, bound) => Expr::Float(s, bound),
|
||||
Int(s, bound) => Expr::Int(s, bound),
|
||||
Num(s) => Expr::Num(s),
|
||||
Float(s) => Expr::Float(s),
|
||||
NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
}
|
||||
})
|
||||
|
|
|
@ -1,18 +1,14 @@
|
|||
use crate::ast::{Base, NumericBound};
|
||||
use crate::ast::Base;
|
||||
use crate::parser::{ENumber, ParseResult, Parser, Progress};
|
||||
use crate::state::State;
|
||||
use roc_module::numeric::{FloatWidth, IntWidth, NumWidth};
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum NumLiteral<'a> {
|
||||
Float(&'a str, NumericBound<FloatWidth>),
|
||||
Int(&'a str, NumericBound<IntWidth>),
|
||||
Num(&'a str, NumericBound<NumWidth>),
|
||||
Float(&'a str),
|
||||
Num(&'a str),
|
||||
NonBase10Int {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bound: NumericBound<IntWidth>,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -61,101 +57,27 @@ fn parse_number_base<'a>(
|
|||
}
|
||||
}
|
||||
|
||||
macro_rules! parse_num_suffix {
|
||||
($bytes:expr, $($suffix:expr, $width:expr)*) => {
|
||||
$(
|
||||
{
|
||||
let len = $suffix.len();
|
||||
if $bytes.starts_with($suffix)
|
||||
&& {
|
||||
let next = $bytes[len..].get(0);
|
||||
match next { Some(c) => !(c.is_ascii_digit() || c.is_ascii_alphabetic()), None => true, }
|
||||
}
|
||||
{
|
||||
return Some(($width, len))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
fn get_int_suffix(bytes: &[u8]) -> Option<(IntWidth, usize)> {
|
||||
parse_num_suffix! {
|
||||
bytes,
|
||||
b"u8", IntWidth::U8
|
||||
b"u16", IntWidth::U16
|
||||
b"u32", IntWidth::U32
|
||||
b"u64", IntWidth::U64
|
||||
b"u128", IntWidth::U128
|
||||
b"i8", IntWidth::I8
|
||||
b"i16", IntWidth::I16
|
||||
b"i32", IntWidth::I32
|
||||
b"i64", IntWidth::I64
|
||||
b"i128", IntWidth::I128
|
||||
b"nat", IntWidth::Nat
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn get_float_suffix(bytes: &[u8]) -> Option<(FloatWidth, usize)> {
|
||||
parse_num_suffix! {
|
||||
bytes,
|
||||
b"dec", FloatWidth::Dec
|
||||
b"f32", FloatWidth::F32
|
||||
b"f64", FloatWidth::F64
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn get_num_suffix(bytes: &[u8]) -> Option<(NumWidth, usize)> {
|
||||
(get_int_suffix(bytes).map(|(iw, l)| (NumWidth::Int(iw), l)))
|
||||
.or_else(|| get_float_suffix(bytes).map(|(fw, l)| (NumWidth::Float(fw), l)))
|
||||
}
|
||||
|
||||
fn chomp_number_base<'a>(
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
bytes: &'a [u8],
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
|
||||
let (_, (_is_float, bound, chomped), state) =
|
||||
chomp_number(bytes, state, is_negative, base == Base::Hex)?;
|
||||
let (_is_float, chomped) = chomp_number(bytes);
|
||||
|
||||
let (bound, chomped_number) = if let Some((bound, chomped_before_suffix)) = bound {
|
||||
(Some(bound), chomped_before_suffix)
|
||||
} else {
|
||||
(None, chomped)
|
||||
};
|
||||
|
||||
let string = unsafe { std::str::from_utf8_unchecked(&bytes[..chomped_number]) };
|
||||
let string = unsafe { std::str::from_utf8_unchecked(&bytes[..chomped]) };
|
||||
|
||||
let new = state.advance(chomped + 2 + is_negative as usize);
|
||||
|
||||
match bound {
|
||||
None => Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::NonBase10Int {
|
||||
is_negative,
|
||||
string,
|
||||
base,
|
||||
bound: NumericBound::None { width_variable: () },
|
||||
},
|
||||
new,
|
||||
)),
|
||||
Some(NumWidth::Int(iw)) => Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::NonBase10Int {
|
||||
is_negative,
|
||||
string,
|
||||
base,
|
||||
bound: NumericBound::Exact(iw),
|
||||
},
|
||||
new,
|
||||
)),
|
||||
Some(NumWidth::Float(_)) => {
|
||||
Err((Progress::MadeProgress, ENumber::IntHasFloatSuffix, state))
|
||||
}
|
||||
}
|
||||
Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::NonBase10Int {
|
||||
is_negative,
|
||||
string,
|
||||
base,
|
||||
},
|
||||
new,
|
||||
))
|
||||
}
|
||||
|
||||
fn chomp_number_dec<'a>(
|
||||
|
@ -163,62 +85,37 @@ fn chomp_number_dec<'a>(
|
|||
bytes: &'a [u8],
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, NumLiteral<'a>, ENumber> {
|
||||
let (_, (is_float, bound, chomped), state) = chomp_number(bytes, state, is_negative, false)?;
|
||||
let (is_float, chomped) = chomp_number(bytes);
|
||||
|
||||
if is_negative && chomped == 0 {
|
||||
// we're probably actually looking at unary negation here
|
||||
return Err((Progress::NoProgress, ENumber::End, state));
|
||||
}
|
||||
|
||||
if !bytes.get(0).copied().unwrap_or_default().is_ascii_digit() {
|
||||
// we're probably actually looking at unary negation here
|
||||
return Err((Progress::NoProgress, ENumber::End, state));
|
||||
}
|
||||
|
||||
let (bound, chomped_number) = if let Some((bound, chomped_before_suffix)) = bound {
|
||||
(Some(bound), chomped_before_suffix)
|
||||
} else {
|
||||
(None, chomped)
|
||||
};
|
||||
|
||||
let string = unsafe {
|
||||
std::str::from_utf8_unchecked(&state.bytes()[0..chomped_number + is_negative as usize])
|
||||
};
|
||||
let string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[0..chomped + is_negative as usize]) };
|
||||
|
||||
let new = state.advance(chomped + is_negative as usize);
|
||||
|
||||
match (is_float, bound) {
|
||||
(true, None) => Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::Float(string, NumericBound::None { width_variable: () }),
|
||||
new,
|
||||
)),
|
||||
(false, None) => Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::Num(string, NumericBound::None { width_variable: () }),
|
||||
new,
|
||||
)),
|
||||
(_, Some(NumWidth::Float(fw))) => Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::Float(string, NumericBound::Exact(fw)),
|
||||
new,
|
||||
)),
|
||||
(false, Some(NumWidth::Int(iw))) => Ok((
|
||||
Progress::MadeProgress,
|
||||
NumLiteral::Int(string, NumericBound::Exact(iw)),
|
||||
new,
|
||||
)),
|
||||
(true, Some(NumWidth::Int(_))) => {
|
||||
Err((Progress::MadeProgress, ENumber::FloatHasIntSuffix, state))
|
||||
}
|
||||
}
|
||||
Ok((
|
||||
Progress::MadeProgress,
|
||||
if is_float {
|
||||
NumLiteral::Float(string)
|
||||
} else {
|
||||
NumLiteral::Num(string)
|
||||
},
|
||||
new,
|
||||
))
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn chomp_number<'a>(
|
||||
mut bytes: &'a [u8],
|
||||
state: State<'a>,
|
||||
is_negative: bool,
|
||||
hex: bool,
|
||||
) -> ParseResult<'a, (bool, Option<(NumWidth, usize)>, usize), ENumber> {
|
||||
fn chomp_number(mut bytes: &[u8]) -> (bool, usize) {
|
||||
let start_bytes_len = bytes.len();
|
||||
let mut is_float = false;
|
||||
let mut suffix_and_chomped_before = None;
|
||||
|
||||
while let Some(byte) = bytes.get(0) {
|
||||
match byte {
|
||||
|
@ -247,69 +144,18 @@ fn chomp_number<'a>(
|
|||
// skip
|
||||
bytes = &bytes[1..];
|
||||
}
|
||||
_ if byte.is_ascii_digit() => {
|
||||
_ if byte.is_ascii_digit() || byte.is_ascii_alphabetic() => {
|
||||
// valid digits (alphabetic in hex digits, and the `e` in `12e26` scientific notation
|
||||
bytes = &bytes[1..];
|
||||
}
|
||||
_ if byte.is_ascii_hexdigit() && hex => {
|
||||
bytes = &bytes[1..];
|
||||
}
|
||||
_ if byte.is_ascii_whitespace() || byte.is_ascii_punctuation() => {
|
||||
// not a valid digit; we're done
|
||||
return Ok((
|
||||
Progress::MadeProgress,
|
||||
(
|
||||
is_float,
|
||||
suffix_and_chomped_before,
|
||||
start_bytes_len - bytes.len(),
|
||||
),
|
||||
state,
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
// This might be a suffix; try that first.
|
||||
let parsed_suffix = if suffix_and_chomped_before.is_none() {
|
||||
get_num_suffix(bytes)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some((bound, advanced_by)) = parsed_suffix {
|
||||
suffix_and_chomped_before = Some((bound, start_bytes_len - bytes.len()));
|
||||
bytes = &bytes[advanced_by..];
|
||||
continue;
|
||||
}
|
||||
|
||||
// Okay, this number is invalid.
|
||||
|
||||
if start_bytes_len - bytes.len() == 0 && is_negative {
|
||||
// We're probably actually looking at unary negation here. Reset the progress.
|
||||
return Err((Progress::NoProgress, ENumber::End, state));
|
||||
}
|
||||
|
||||
if bytes
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.is_ascii_alphabetic()
|
||||
{
|
||||
// The user likely mistyped a literal suffix type here.
|
||||
return Err((
|
||||
Progress::MadeProgress,
|
||||
ENumber::LiteralSuffix,
|
||||
state.advance(start_bytes_len - bytes.len()),
|
||||
));
|
||||
}
|
||||
|
||||
return Err((Progress::MadeProgress, ENumber::End, state));
|
||||
// not a valid digit; we're done
|
||||
return (is_float, start_bytes_len - bytes.len());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the above loop exits, we must be dealing with an empty slice
|
||||
// therefore we parsed all of the bytes in the input
|
||||
Ok((
|
||||
Progress::MadeProgress,
|
||||
(is_float, suffix_and_chomped_before, start_bytes_len),
|
||||
state,
|
||||
))
|
||||
(is_float, start_bytes_len)
|
||||
}
|
||||
|
|
|
@ -338,9 +338,6 @@ pub enum EExpr<'a> {
|
|||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ENumber {
|
||||
End,
|
||||
LiteralSuffix,
|
||||
IntHasFloatSuffix,
|
||||
FloatHasIntSuffix,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
|
|
@ -138,19 +138,16 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
|||
use crate::number_literal::NumLiteral::*;
|
||||
|
||||
match literal {
|
||||
Num(s, bound) => Pattern::NumLiteral(s, bound),
|
||||
Float(s, bound) => Pattern::FloatLiteral(s, bound),
|
||||
Int(s, bound) => Pattern::IntLiteral(s, bound),
|
||||
Num(s) => Pattern::NumLiteral(s),
|
||||
Float(s) => Pattern::FloatLiteral(s),
|
||||
NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
} => Pattern::NonBase10Literal {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
bound,
|
||||
},
|
||||
}
|
||||
}),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue