mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-03 08:34:33 +00:00
180 lines
6 KiB
Rust
180 lines
6 KiB
Rust
use crate::ast::{Attempting, Base, Expr};
|
|
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
|
|
use std::char;
|
|
use std::str::from_utf8_unchecked;
|
|
|
|
pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
|
move |_arena, state: State<'a>| {
|
|
let bytes = &mut state.bytes.iter();
|
|
|
|
match bytes.next() {
|
|
Some(&first_byte) => {
|
|
// Number literals must start with either an '-' or a digit.
|
|
if first_byte == b'-' || (first_byte as char).is_ascii_digit() {
|
|
parse_number_literal(first_byte as char, bytes, state)
|
|
} else {
|
|
Err(unexpected(1, state, Attempting::NumberLiteral))
|
|
}
|
|
}
|
|
None => Err(unexpected_eof(0, state.attempting, state)),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn parse_number_literal<'a, I>(
|
|
first_ch: char,
|
|
bytes: &mut I,
|
|
state: State<'a>,
|
|
) -> ParseResult<'a, Expr<'a>>
|
|
where
|
|
I: Iterator<Item = &'a u8>,
|
|
{
|
|
use self::LiteralType::*;
|
|
|
|
let mut typ = Num;
|
|
|
|
// We already parsed 1 character (which may have been a minus sign).
|
|
let mut bytes_parsed = 1;
|
|
let mut prev_byte = first_ch as u8;
|
|
let mut has_parsed_digits = first_ch.is_ascii_digit();
|
|
|
|
for &next_byte in bytes {
|
|
let err_unexpected = || {
|
|
Err(unexpected(
|
|
bytes_parsed,
|
|
state.clone(),
|
|
Attempting::NumberLiteral,
|
|
))
|
|
};
|
|
|
|
let is_potentially_non_base10 = || {
|
|
(bytes_parsed == 1 && first_ch == '0')
|
|
|| (bytes_parsed == 2 && first_ch == '-' && prev_byte == b'0')
|
|
};
|
|
|
|
match next_byte as char {
|
|
'.' => {
|
|
if typ == Float {
|
|
// You only get one decimal point!
|
|
return err_unexpected();
|
|
} else {
|
|
typ = Float;
|
|
}
|
|
}
|
|
'x' => {
|
|
if is_potentially_non_base10() {
|
|
typ = Hex;
|
|
} else {
|
|
return err_unexpected();
|
|
}
|
|
}
|
|
'b' if typ == Num => {
|
|
// We have to check for typ == Num because otherwise we get a false
|
|
// positive here when parsing a hex literal that happens to have
|
|
// a 'b' in it, e.g. 0xbbbb
|
|
if is_potentially_non_base10() {
|
|
typ = Binary;
|
|
} else {
|
|
return err_unexpected();
|
|
}
|
|
}
|
|
'o' => {
|
|
if is_potentially_non_base10() {
|
|
typ = Octal;
|
|
} else {
|
|
return err_unexpected();
|
|
}
|
|
}
|
|
'_' => {
|
|
// Underscores are ignored.
|
|
}
|
|
next_ch => {
|
|
if next_ch.is_ascii_digit() {
|
|
has_parsed_digits = true;
|
|
} else {
|
|
if !has_parsed_digits {
|
|
// No digits! We likely parsed a minus sign
|
|
// that's actually a unary negation operator.
|
|
return err_unexpected();
|
|
}
|
|
|
|
// ASCII alphabetic chars (like 'a' and 'f') are
|
|
// allowed in Hex int literals. We verify them in
|
|
// canonicalization, so if there's a problem, we can
|
|
// give a more helpful error (e.g. "the character 'f'
|
|
// is not allowed in Octal literals" or
|
|
// "the character 'g' is outside the range of valid
|
|
// Hex literals") while still allowing the formatter
|
|
// to format them normally.
|
|
if !next_ch.is_ascii_alphabetic() {
|
|
// We hit an invalid number literal character; we're done!
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Since we only consume characters in the ASCII range for number literals,
|
|
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
|
|
bytes_parsed += 1;
|
|
prev_byte = next_byte;
|
|
}
|
|
|
|
// At this point we have a number, and will definitely succeed.
|
|
// If the number is malformed (outside the supported range),
|
|
// we'll succeed with an appropriate Expr which records that.
|
|
match typ {
|
|
Num => Ok((
|
|
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
|
|
// already validated that this range contains only ASCII digits
|
|
Expr::Num(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
|
|
state.advance_without_indenting(bytes_parsed)?,
|
|
)),
|
|
Float => Ok((
|
|
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
|
|
// already validated that this range contains only ASCII digits
|
|
Expr::Float(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
|
|
state.advance_without_indenting(bytes_parsed)?,
|
|
)),
|
|
// For these we trim off the 0x/0o/0b part
|
|
Hex => from_base(Base::Hex, first_ch, bytes_parsed, state),
|
|
Octal => from_base(Base::Octal, first_ch, bytes_parsed, state),
|
|
Binary => from_base(Base::Binary, first_ch, bytes_parsed, state),
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
enum LiteralType {
|
|
Num,
|
|
Float,
|
|
Hex,
|
|
Octal,
|
|
Binary,
|
|
}
|
|
|
|
fn from_base(
|
|
base: Base,
|
|
first_ch: char,
|
|
bytes_parsed: usize,
|
|
state: State<'_>,
|
|
) -> ParseResult<'_, Expr<'_>> {
|
|
let is_negative = first_ch == '-';
|
|
let bytes = if is_negative {
|
|
&state.bytes[3..bytes_parsed]
|
|
} else {
|
|
&state.bytes[2..bytes_parsed]
|
|
};
|
|
|
|
match parse_utf8(bytes) {
|
|
Ok(string) => Ok((
|
|
Expr::NonBase10Int {
|
|
is_negative,
|
|
string,
|
|
base,
|
|
},
|
|
state.advance_without_indenting(bytes_parsed)?,
|
|
)),
|
|
Err(reason) => state.fail(reason),
|
|
}
|
|
}
|