mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-03 08:34:33 +00:00
Fix parsing hex/octal/binary literals
This commit is contained in:
parent
37422c6a76
commit
2710dcb95c
8 changed files with 163 additions and 115 deletions
|
@ -3,6 +3,7 @@ use crate::can::expr::Expr;
|
|||
use crate::can::problem::Problem;
|
||||
use crate::can::problem::RuntimeError::*;
|
||||
use crate::constrain;
|
||||
use crate::parse::ast::Base;
|
||||
use crate::region::Region;
|
||||
use crate::subs::VarStore;
|
||||
use crate::types::Constraint::{self, *};
|
||||
|
@ -63,21 +64,15 @@ pub fn finish_parsing_int(raw: &str) -> Result<i64, &str> {
|
|||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn finish_parsing_hex(raw: &str) -> Result<i64, &str> {
|
||||
// Ignore underscores.
|
||||
i64::from_str_radix(raw.replace("_", "").as_str(), 16).map_err(|_| raw)
|
||||
}
|
||||
pub fn finish_parsing_base(raw: &str, base: Base) -> Result<i64, &str> {
|
||||
let radix = match base {
|
||||
Base::Hex => 16,
|
||||
Base::Octal => 8,
|
||||
Base::Binary => 2,
|
||||
};
|
||||
|
||||
#[inline(always)]
|
||||
pub fn finish_parsing_oct(raw: &str) -> Result<i64, &str> {
|
||||
// Ignore underscores.
|
||||
i64::from_str_radix(raw.replace("_", "").as_str(), 8).map_err(|_| raw)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn finish_parsing_bin(raw: &str) -> Result<i64, &str> {
|
||||
// Ignore underscores.
|
||||
i64::from_str_radix(raw.replace("_", "").as_str(), 2).map_err(|_| raw)
|
||||
i64::from_str_radix(raw.replace("_", "").as_str(), radix).map_err(|_| raw)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
|
|
@ -56,12 +56,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located<Expr<'a>>) -> &'a
|
|||
| Nested(Float(_))
|
||||
| Int(_)
|
||||
| Nested(Int(_))
|
||||
| HexInt(_)
|
||||
| Nested(HexInt(_))
|
||||
| OctalInt(_)
|
||||
| Nested(OctalInt(_))
|
||||
| BinaryInt(_)
|
||||
| Nested(BinaryInt(_))
|
||||
| NonBase10Int { .. }
|
||||
| Nested(NonBase10Int { .. })
|
||||
| Str(_)
|
||||
| Nested(Str(_))
|
||||
| BlockStr(_)
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
use crate::can::env::Env;
|
||||
use crate::can::num::{
|
||||
finish_parsing_bin, finish_parsing_float, finish_parsing_hex, finish_parsing_int,
|
||||
finish_parsing_oct,
|
||||
};
|
||||
use crate::can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
|
||||
use crate::can::problem::Problem;
|
||||
use crate::can::scope::Scope;
|
||||
use crate::can::symbol::Symbol;
|
||||
|
@ -188,36 +185,20 @@ pub fn canonicalize_pattern<'a>(
|
|||
}
|
||||
},
|
||||
|
||||
&HexIntLiteral(string) => match pattern_type {
|
||||
&NonBase10Literal {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
} => match pattern_type {
|
||||
CaseBranch => {
|
||||
let int = finish_parsing_hex(string)
|
||||
.unwrap_or_else(|_| panic!("TODO handle malformed hex int pattern"));
|
||||
let int = finish_parsing_base(string, *base)
|
||||
.unwrap_or_else(|_| panic!("TODO handle malformed {:?} pattern", base));
|
||||
|
||||
Pattern::IntLiteral(int)
|
||||
}
|
||||
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
|
||||
unsupported_pattern(env, ptype, region)
|
||||
}
|
||||
},
|
||||
|
||||
&OctalIntLiteral(string) => match pattern_type {
|
||||
CaseBranch => {
|
||||
let int = finish_parsing_oct(string)
|
||||
.unwrap_or_else(|_| panic!("TODO handle malformed octal int pattern"));
|
||||
|
||||
Pattern::IntLiteral(int)
|
||||
}
|
||||
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
|
||||
unsupported_pattern(env, ptype, region)
|
||||
}
|
||||
},
|
||||
|
||||
&BinaryIntLiteral(string) => match pattern_type {
|
||||
CaseBranch => {
|
||||
let int = finish_parsing_bin(string)
|
||||
.unwrap_or_else(|_| panic!("TODO handle malformed binary int pattern"));
|
||||
|
||||
Pattern::IntLiteral(int)
|
||||
if *is_negative {
|
||||
Pattern::IntLiteral(-int)
|
||||
} else {
|
||||
Pattern::IntLiteral(int)
|
||||
}
|
||||
}
|
||||
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
|
||||
unsupported_pattern(env, ptype, region)
|
||||
|
@ -298,7 +279,7 @@ fn add_constraints<'a>(
|
|||
},
|
||||
);
|
||||
}
|
||||
IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_) | BinaryIntLiteral(_) => {
|
||||
IntLiteral(_) | NonBase10Literal { .. } => {
|
||||
state.constraints.push(Constraint::Pattern(
|
||||
region,
|
||||
PatternCategory::Int,
|
||||
|
@ -377,9 +358,16 @@ pub fn remove_idents(pattern: &ast::Pattern, idents: &mut ImMap<Ident, (Symbol,
|
|||
// Ignore the newline/comment info; it doesn't matter in canonicalization.
|
||||
remove_idents(pattern, idents)
|
||||
}
|
||||
GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | BinaryIntLiteral(_)
|
||||
| OctalIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_)
|
||||
| EmptyRecordLiteral | Malformed(_) | Underscore => {}
|
||||
GlobalTag(_)
|
||||
| PrivateTag(_)
|
||||
| IntLiteral(_)
|
||||
| NonBase10Literal { .. }
|
||||
| FloatLiteral(_)
|
||||
| StrLiteral(_)
|
||||
| BlockStrLiteral(_)
|
||||
| EmptyRecordLiteral
|
||||
| Malformed(_)
|
||||
| Underscore => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -439,8 +427,15 @@ fn add_idents_from_pattern<'a>(
|
|||
// Ignore the newline/comment info; it doesn't matter in canonicalization.
|
||||
add_idents_from_pattern(region, pattern, scope, answer)
|
||||
}
|
||||
GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_)
|
||||
| BinaryIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_)
|
||||
| EmptyRecordLiteral | Malformed(_) | Underscore => (),
|
||||
GlobalTag(_)
|
||||
| PrivateTag(_)
|
||||
| IntLiteral(_)
|
||||
| NonBase10Literal { .. }
|
||||
| FloatLiteral(_)
|
||||
| StrLiteral(_)
|
||||
| BlockStrLiteral(_)
|
||||
| EmptyRecordLiteral
|
||||
| Malformed(_)
|
||||
| Underscore => (),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::fmt::def::fmt_def;
|
||||
use crate::fmt::pattern::fmt_pattern;
|
||||
use crate::fmt::spaces::{add_spaces, fmt_comments_only, fmt_spaces, newline, INDENT};
|
||||
use crate::parse::ast::{AssignedField, Expr, Pattern};
|
||||
use crate::parse::ast::{AssignedField, Base, Expr, Pattern};
|
||||
use crate::region::Located;
|
||||
use bumpalo::collections::{String, Vec};
|
||||
|
||||
|
@ -66,19 +66,23 @@ pub fn fmt_expr<'a>(
|
|||
}
|
||||
Int(string) => buf.push_str(string),
|
||||
Float(string) => buf.push_str(string),
|
||||
HexInt(string) => {
|
||||
NonBase10Int {
|
||||
base,
|
||||
string,
|
||||
is_negative,
|
||||
} => {
|
||||
if *is_negative {
|
||||
buf.push('-');
|
||||
}
|
||||
|
||||
buf.push('0');
|
||||
buf.push('x');
|
||||
buf.push_str(string);
|
||||
}
|
||||
BinaryInt(string) => {
|
||||
buf.push('0');
|
||||
buf.push('b');
|
||||
buf.push_str(string);
|
||||
}
|
||||
OctalInt(string) => {
|
||||
buf.push('0');
|
||||
buf.push('o');
|
||||
|
||||
buf.push(match base {
|
||||
Base::Hex => 'x',
|
||||
Base::Octal => 'o',
|
||||
Base::Binary => 'b',
|
||||
});
|
||||
|
||||
buf.push_str(string);
|
||||
}
|
||||
Record(loc_fields) => {
|
||||
|
@ -247,9 +251,7 @@ pub fn is_multiline_expr<'a>(expr: &'a Expr<'a>) -> bool {
|
|||
// These expressions never have newlines
|
||||
Float(_)
|
||||
| Int(_)
|
||||
| HexInt(_)
|
||||
| OctalInt(_)
|
||||
| BinaryInt(_)
|
||||
| NonBase10Int { .. }
|
||||
| Str(_)
|
||||
| Access(_, _)
|
||||
| AccessorFunction(_)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::fmt::spaces::fmt_spaces;
|
||||
use crate::parse::ast::Pattern;
|
||||
use crate::parse::ast::{Base, Pattern};
|
||||
use bumpalo::collections::String;
|
||||
|
||||
pub fn fmt_pattern<'a>(
|
||||
|
@ -56,9 +56,25 @@ pub fn fmt_pattern<'a>(
|
|||
}
|
||||
|
||||
IntLiteral(string) => buf.push_str(string),
|
||||
HexIntLiteral(string) => buf.push_str(string),
|
||||
OctalIntLiteral(string) => buf.push_str(string),
|
||||
BinaryIntLiteral(string) => buf.push_str(string),
|
||||
NonBase10Literal {
|
||||
base,
|
||||
string,
|
||||
is_negative,
|
||||
} => {
|
||||
if *is_negative {
|
||||
buf.push('-');
|
||||
}
|
||||
|
||||
buf.push('0');
|
||||
|
||||
buf.push(match base {
|
||||
Base::Hex => 'x',
|
||||
Base::Octal => 'o',
|
||||
Base::Binary => 'b',
|
||||
});
|
||||
|
||||
buf.push_str(string);
|
||||
}
|
||||
FloatLiteral(string) => buf.push_str(string),
|
||||
StrLiteral(string) => buf.push_str(string),
|
||||
BlockStrLiteral(lines) => {
|
||||
|
|
|
@ -115,9 +115,11 @@ pub enum Expr<'a> {
|
|||
// Number Literals
|
||||
Float(&'a str),
|
||||
Int(&'a str),
|
||||
HexInt(&'a str),
|
||||
OctalInt(&'a str),
|
||||
BinaryInt(&'a str),
|
||||
NonBase10Int {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
},
|
||||
|
||||
// String Literals
|
||||
Str(&'a str),
|
||||
|
@ -283,9 +285,11 @@ pub enum Pattern<'a> {
|
|||
|
||||
// Literal
|
||||
IntLiteral(&'a str),
|
||||
HexIntLiteral(&'a str),
|
||||
OctalIntLiteral(&'a str),
|
||||
BinaryIntLiteral(&'a str),
|
||||
NonBase10Literal {
|
||||
string: &'a str,
|
||||
base: Base,
|
||||
is_negative: bool,
|
||||
},
|
||||
FloatLiteral(&'a str),
|
||||
StrLiteral(&'a str),
|
||||
BlockStrLiteral(&'a [&'a str]),
|
||||
|
@ -301,6 +305,13 @@ pub enum Pattern<'a> {
|
|||
QualifiedIdentifier(MaybeQualified<'a, &'a str>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub enum Base {
|
||||
Octal,
|
||||
Binary,
|
||||
Hex,
|
||||
}
|
||||
|
||||
impl<'a> Pattern<'a> {
|
||||
pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> {
|
||||
match ident {
|
||||
|
|
|
@ -302,9 +302,15 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
|
|||
|
||||
Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
|
||||
Expr::Int(string) => Ok(Pattern::IntLiteral(string)),
|
||||
Expr::HexInt(string) => Ok(Pattern::HexIntLiteral(string)),
|
||||
Expr::OctalInt(string) => Ok(Pattern::OctalIntLiteral(string)),
|
||||
Expr::BinaryInt(string) => Ok(Pattern::BinaryIntLiteral(string)),
|
||||
Expr::NonBase10Int {
|
||||
string,
|
||||
base,
|
||||
is_negative,
|
||||
} => Ok(Pattern::NonBase10Literal {
|
||||
string,
|
||||
base: *base,
|
||||
is_negative: *is_negative,
|
||||
}),
|
||||
Expr::Str(string) => Ok(Pattern::StrLiteral(string)),
|
||||
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::parse::ast::{Attempting, Expr};
|
||||
use crate::parse::ast::{Attempting, Base, Expr};
|
||||
use crate::parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||
use std::char;
|
||||
|
||||
|
@ -39,27 +39,25 @@ where
|
|||
let mut typ = Int;
|
||||
|
||||
// We already parsed 1 character (which may have been a minus sign).
|
||||
let mut chars_parsed = 1;
|
||||
let mut bytes_parsed = 1;
|
||||
let mut prev_ch = first_ch;
|
||||
let mut has_parsed_digits = first_ch.is_ascii_digit();
|
||||
|
||||
for next_ch in chars {
|
||||
let err_unexpected = || {
|
||||
Err(unexpected(
|
||||
next_ch,
|
||||
chars_parsed,
|
||||
bytes_parsed,
|
||||
state.clone(),
|
||||
Attempting::NumberLiteral,
|
||||
))
|
||||
};
|
||||
|
||||
// Returns true iff so far we have parsed the given char and no other chars.
|
||||
let so_far_parsed = |ch| chars_parsed == 1 && first_ch == ch;
|
||||
let is_potentially_non_base10 = || {
|
||||
(bytes_parsed == 1 && first_ch == '0')
|
||||
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0')
|
||||
};
|
||||
|
||||
// We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not).
|
||||
// If you want that, do something like (negate 0x01).
|
||||
//
|
||||
// I'm open to changing this policy (that is, allowing support for
|
||||
// negative escaped ints), but it'll complicate parsing logic and seems
|
||||
// nonessential, so I'm leaving it out for now.
|
||||
if next_ch == '.' {
|
||||
if typ == Float {
|
||||
// You only get one decimal point!
|
||||
|
@ -68,51 +66,80 @@ where
|
|||
typ = Float;
|
||||
}
|
||||
} else if next_ch == 'x' {
|
||||
if so_far_parsed('0') {
|
||||
if is_potentially_non_base10() {
|
||||
typ = Hex;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
}
|
||||
} else if next_ch == 'b' {
|
||||
if so_far_parsed('0') {
|
||||
} else if next_ch == 'b' && typ == Int {
|
||||
// We have to check for typ == Int because otherwise we get a false
|
||||
// positive here when parsing a hex literal that happens to have
|
||||
// a 'b' in it, e.g. 0xbbbb
|
||||
if is_potentially_non_base10() {
|
||||
typ = Binary;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
}
|
||||
} else if next_ch == 'o' {
|
||||
if so_far_parsed('0') {
|
||||
if is_potentially_non_base10() {
|
||||
typ = Octal;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
}
|
||||
} else if !next_ch.is_ascii_digit() && next_ch != '_' {
|
||||
if so_far_parsed('-') {
|
||||
// No digits! We likely parsed a minus sign that's actually an operator.
|
||||
return err_unexpected();
|
||||
} else {
|
||||
} else if next_ch.is_ascii_digit() {
|
||||
has_parsed_digits = true;
|
||||
} else if next_ch != '_' &&
|
||||
// ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
|
||||
// We parse them in any int literal, so we can give a more helpful error
|
||||
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
|
||||
// or "the character 'g' is outside the range of valid Hex literals")
|
||||
!next_ch.is_ascii_alphabetic()
|
||||
{
|
||||
if has_parsed_digits {
|
||||
// We hit an invalid number literal character; we're done!
|
||||
break;
|
||||
} else {
|
||||
// No digits! We likely parsed a minus sign that's actually an operator.
|
||||
return err_unexpected();
|
||||
}
|
||||
}
|
||||
|
||||
chars_parsed += 1;
|
||||
// Since we only consume characters in the ASCII range for number literals,
|
||||
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
|
||||
bytes_parsed += 1;
|
||||
prev_ch = next_ch;
|
||||
}
|
||||
|
||||
let from_base = |base| {
|
||||
let is_negative = first_ch == '-';
|
||||
let string = if is_negative {
|
||||
&state.input[3..bytes_parsed]
|
||||
} else {
|
||||
&state.input[2..bytes_parsed]
|
||||
};
|
||||
|
||||
Expr::NonBase10Int {
|
||||
is_negative,
|
||||
string,
|
||||
base,
|
||||
}
|
||||
};
|
||||
|
||||
// At this point we have a number, and will definitely succeed.
|
||||
// If the number is malformed (outside the supported range),
|
||||
// we'll succeed with an appropriate Expr which records that.
|
||||
let expr = match typ {
|
||||
Int => Expr::Int(&state.input[0..chars_parsed]),
|
||||
Float => Expr::Float(&state.input[0..chars_parsed]),
|
||||
Int => Expr::Int(&state.input[0..bytes_parsed]),
|
||||
Float => Expr::Float(&state.input[0..bytes_parsed]),
|
||||
// For these we trim off the 0x/0o/0b part
|
||||
Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]),
|
||||
Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]),
|
||||
Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]),
|
||||
Hex => from_base(Base::Hex),
|
||||
Octal => from_base(Base::Octal),
|
||||
Binary => from_base(Base::Binary),
|
||||
};
|
||||
|
||||
let next_state = state.advance_without_indenting(chars_parsed)?;
|
||||
let next_state = state.advance_without_indenting(bytes_parsed)?;
|
||||
|
||||
Ok((expr, next_state))
|
||||
Ok((dbg!(expr), next_state))
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue