diff --git a/src/can/num.rs b/src/can/num.rs index 1da73a66cf..3a85aebd90 100644 --- a/src/can/num.rs +++ b/src/can/num.rs @@ -3,6 +3,7 @@ use crate::can::expr::Expr; use crate::can::problem::Problem; use crate::can::problem::RuntimeError::*; use crate::constrain; +use crate::parse::ast::Base; use crate::region::Region; use crate::subs::VarStore; use crate::types::Constraint::{self, *}; @@ -63,21 +64,15 @@ pub fn finish_parsing_int(raw: &str) -> Result { } #[inline(always)] -pub fn finish_parsing_hex(raw: &str) -> Result { - // Ignore underscores. - i64::from_str_radix(raw.replace("_", "").as_str(), 16).map_err(|_| raw) -} +pub fn finish_parsing_base(raw: &str, base: Base) -> Result { + let radix = match base { + Base::Hex => 16, + Base::Octal => 8, + Base::Binary => 2, + }; -#[inline(always)] -pub fn finish_parsing_oct(raw: &str) -> Result { // Ignore underscores. - i64::from_str_radix(raw.replace("_", "").as_str(), 8).map_err(|_| raw) -} - -#[inline(always)] -pub fn finish_parsing_bin(raw: &str) -> Result { - // Ignore underscores. - i64::from_str_radix(raw.replace("_", "").as_str(), 2).map_err(|_| raw) + i64::from_str_radix(raw.replace("_", "").as_str(), radix).map_err(|_| raw) } #[inline(always)] diff --git a/src/can/operator.rs b/src/can/operator.rs index 0ad38404cc..0556b55503 100644 --- a/src/can/operator.rs +++ b/src/can/operator.rs @@ -56,12 +56,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located>) -> &'a | Nested(Float(_)) | Int(_) | Nested(Int(_)) - | HexInt(_) - | Nested(HexInt(_)) - | OctalInt(_) - | Nested(OctalInt(_)) - | BinaryInt(_) - | Nested(BinaryInt(_)) + | NonBase10Int { .. } + | Nested(NonBase10Int { .. }) | Str(_) | Nested(Str(_)) | BlockStr(_) diff --git a/src/can/pattern.rs b/src/can/pattern.rs index d1f84450d7..062c375e3a 100644 --- a/src/can/pattern.rs +++ b/src/can/pattern.rs @@ -1,8 +1,5 @@ use crate::can::env::Env; -use crate::can::num::{ - finish_parsing_bin, finish_parsing_float, finish_parsing_hex, finish_parsing_int, - finish_parsing_oct, -}; +use crate::can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int}; use crate::can::problem::Problem; use crate::can::scope::Scope; use crate::can::symbol::Symbol; @@ -188,36 +185,20 @@ pub fn canonicalize_pattern<'a>( } }, - &HexIntLiteral(string) => match pattern_type { + &NonBase10Literal { + string, + base, + is_negative, + } => match pattern_type { CaseBranch => { - let int = finish_parsing_hex(string) - .unwrap_or_else(|_| panic!("TODO handle malformed hex int pattern")); + let int = finish_parsing_base(string, *base) + .unwrap_or_else(|_| panic!("TODO handle malformed {:?} pattern", base)); - Pattern::IntLiteral(int) - } - ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => { - unsupported_pattern(env, ptype, region) - } - }, - - &OctalIntLiteral(string) => match pattern_type { - CaseBranch => { - let int = finish_parsing_oct(string) - .unwrap_or_else(|_| panic!("TODO handle malformed octal int pattern")); - - Pattern::IntLiteral(int) - } - ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => { - unsupported_pattern(env, ptype, region) - } - }, - - &BinaryIntLiteral(string) => match pattern_type { - CaseBranch => { - let int = finish_parsing_bin(string) - .unwrap_or_else(|_| panic!("TODO handle malformed binary int pattern")); - - Pattern::IntLiteral(int) + if *is_negative { + Pattern::IntLiteral(-int) + } else { + Pattern::IntLiteral(int) + } } ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => { unsupported_pattern(env, ptype, region) @@ -298,7 +279,7 @@ fn add_constraints<'a>( }, ); } - IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_) | BinaryIntLiteral(_) => { + IntLiteral(_) | NonBase10Literal { .. } => { state.constraints.push(Constraint::Pattern( region, PatternCategory::Int, @@ -377,9 +358,16 @@ pub fn remove_idents(pattern: &ast::Pattern, idents: &mut ImMap {} + GlobalTag(_) + | PrivateTag(_) + | IntLiteral(_) + | NonBase10Literal { .. } + | FloatLiteral(_) + | StrLiteral(_) + | BlockStrLiteral(_) + | EmptyRecordLiteral + | Malformed(_) + | Underscore => {} } } @@ -439,8 +427,15 @@ fn add_idents_from_pattern<'a>( // Ignore the newline/comment info; it doesn't matter in canonicalization. add_idents_from_pattern(region, pattern, scope, answer) } - GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_) - | BinaryIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_) - | EmptyRecordLiteral | Malformed(_) | Underscore => (), + GlobalTag(_) + | PrivateTag(_) + | IntLiteral(_) + | NonBase10Literal { .. } + | FloatLiteral(_) + | StrLiteral(_) + | BlockStrLiteral(_) + | EmptyRecordLiteral + | Malformed(_) + | Underscore => (), } } diff --git a/src/fmt/expr.rs b/src/fmt/expr.rs index 082224a3fd..f9227ec5b9 100644 --- a/src/fmt/expr.rs +++ b/src/fmt/expr.rs @@ -1,7 +1,7 @@ use crate::fmt::def::fmt_def; use crate::fmt::pattern::fmt_pattern; use crate::fmt::spaces::{add_spaces, fmt_comments_only, fmt_spaces, newline, INDENT}; -use crate::parse::ast::{AssignedField, Expr, Pattern}; +use crate::parse::ast::{AssignedField, Base, Expr, Pattern}; use crate::region::Located; use bumpalo::collections::{String, Vec}; @@ -66,19 +66,23 @@ pub fn fmt_expr<'a>( } Int(string) => buf.push_str(string), Float(string) => buf.push_str(string), - HexInt(string) => { + NonBase10Int { + base, + string, + is_negative, + } => { + if *is_negative { + buf.push('-'); + } + buf.push('0'); - buf.push('x'); - buf.push_str(string); - } - BinaryInt(string) => { - buf.push('0'); - buf.push('b'); - buf.push_str(string); - } - OctalInt(string) => { - buf.push('0'); - buf.push('o'); + + buf.push(match base { + Base::Hex => 'x', + Base::Octal => 'o', + Base::Binary => 'b', + }); + buf.push_str(string); } Record(loc_fields) => { @@ -247,9 +251,7 @@ pub fn is_multiline_expr<'a>(expr: &'a Expr<'a>) -> bool { // These expressions never have newlines Float(_) | Int(_) - | HexInt(_) - | OctalInt(_) - | BinaryInt(_) + | NonBase10Int { .. } | Str(_) | Access(_, _) | AccessorFunction(_) diff --git a/src/fmt/pattern.rs b/src/fmt/pattern.rs index 41f74b0642..8ab9c109ad 100644 --- a/src/fmt/pattern.rs +++ b/src/fmt/pattern.rs @@ -1,5 +1,5 @@ use crate::fmt::spaces::fmt_spaces; -use crate::parse::ast::Pattern; +use crate::parse::ast::{Base, Pattern}; use bumpalo::collections::String; pub fn fmt_pattern<'a>( @@ -56,9 +56,25 @@ pub fn fmt_pattern<'a>( } IntLiteral(string) => buf.push_str(string), - HexIntLiteral(string) => buf.push_str(string), - OctalIntLiteral(string) => buf.push_str(string), - BinaryIntLiteral(string) => buf.push_str(string), + NonBase10Literal { + base, + string, + is_negative, + } => { + if *is_negative { + buf.push('-'); + } + + buf.push('0'); + + buf.push(match base { + Base::Hex => 'x', + Base::Octal => 'o', + Base::Binary => 'b', + }); + + buf.push_str(string); + } FloatLiteral(string) => buf.push_str(string), StrLiteral(string) => buf.push_str(string), BlockStrLiteral(lines) => { diff --git a/src/parse/ast.rs b/src/parse/ast.rs index 8ed09db6f0..149b37ef39 100644 --- a/src/parse/ast.rs +++ b/src/parse/ast.rs @@ -115,9 +115,11 @@ pub enum Expr<'a> { // Number Literals Float(&'a str), Int(&'a str), - HexInt(&'a str), - OctalInt(&'a str), - BinaryInt(&'a str), + NonBase10Int { + string: &'a str, + base: Base, + is_negative: bool, + }, // String Literals Str(&'a str), @@ -283,9 +285,11 @@ pub enum Pattern<'a> { // Literal IntLiteral(&'a str), - HexIntLiteral(&'a str), - OctalIntLiteral(&'a str), - BinaryIntLiteral(&'a str), + NonBase10Literal { + string: &'a str, + base: Base, + is_negative: bool, + }, FloatLiteral(&'a str), StrLiteral(&'a str), BlockStrLiteral(&'a [&'a str]), @@ -301,6 +305,13 @@ pub enum Pattern<'a> { QualifiedIdentifier(MaybeQualified<'a, &'a str>), } +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum Base { + Octal, + Binary, + Hex, +} + impl<'a> Pattern<'a> { pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> { match ident { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index a8a24cf630..6ad3e1e7f5 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -302,9 +302,15 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result, Expr::Float(string) => Ok(Pattern::FloatLiteral(string)), Expr::Int(string) => Ok(Pattern::IntLiteral(string)), - Expr::HexInt(string) => Ok(Pattern::HexIntLiteral(string)), - Expr::OctalInt(string) => Ok(Pattern::OctalIntLiteral(string)), - Expr::BinaryInt(string) => Ok(Pattern::BinaryIntLiteral(string)), + Expr::NonBase10Int { + string, + base, + is_negative, + } => Ok(Pattern::NonBase10Literal { + string, + base: *base, + is_negative: *is_negative, + }), Expr::Str(string) => Ok(Pattern::StrLiteral(string)), Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)), diff --git a/src/parse/number_literal.rs b/src/parse/number_literal.rs index ed0edc3148..4130e838ec 100644 --- a/src/parse/number_literal.rs +++ b/src/parse/number_literal.rs @@ -1,4 +1,4 @@ -use crate::parse::ast::{Attempting, Expr}; +use crate::parse::ast::{Attempting, Base, Expr}; use crate::parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; use std::char; @@ -39,27 +39,25 @@ where let mut typ = Int; // We already parsed 1 character (which may have been a minus sign). - let mut chars_parsed = 1; + let mut bytes_parsed = 1; + let mut prev_ch = first_ch; + let mut has_parsed_digits = first_ch.is_ascii_digit(); for next_ch in chars { let err_unexpected = || { Err(unexpected( next_ch, - chars_parsed, + bytes_parsed, state.clone(), Attempting::NumberLiteral, )) }; - // Returns true iff so far we have parsed the given char and no other chars. - let so_far_parsed = |ch| chars_parsed == 1 && first_ch == ch; + let is_potentially_non_base10 = || { + (bytes_parsed == 1 && first_ch == '0') + || (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0') + }; - // We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not). - // If you want that, do something like (negate 0x01). - // - // I'm open to changing this policy (that is, allowing support for - // negative escaped ints), but it'll complicate parsing logic and seems - // nonessential, so I'm leaving it out for now. if next_ch == '.' { if typ == Float { // You only get one decimal point! @@ -68,51 +66,80 @@ where typ = Float; } } else if next_ch == 'x' { - if so_far_parsed('0') { + if is_potentially_non_base10() { typ = Hex; } else { return err_unexpected(); } - } else if next_ch == 'b' { - if so_far_parsed('0') { + } else if next_ch == 'b' && typ == Int { + // We have to check for typ == Int because otherwise we get a false + // positive here when parsing a hex literal that happens to have + // a 'b' in it, e.g. 0xbbbb + if is_potentially_non_base10() { typ = Binary; } else { return err_unexpected(); } } else if next_ch == 'o' { - if so_far_parsed('0') { + if is_potentially_non_base10() { typ = Octal; } else { return err_unexpected(); } - } else if !next_ch.is_ascii_digit() && next_ch != '_' { - if so_far_parsed('-') { - // No digits! We likely parsed a minus sign that's actually an operator. - return err_unexpected(); - } else { + } else if next_ch.is_ascii_digit() { + has_parsed_digits = true; + } else if next_ch != '_' && + // ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals. + // We parse them in any int literal, so we can give a more helpful error + // in canonicalization (e.g. "the character 'f' is not allowed in Octal literals" + // or "the character 'g' is outside the range of valid Hex literals") + !next_ch.is_ascii_alphabetic() + { + if has_parsed_digits { // We hit an invalid number literal character; we're done! break; + } else { + // No digits! We likely parsed a minus sign that's actually an operator. + return err_unexpected(); } } - chars_parsed += 1; + // Since we only consume characters in the ASCII range for number literals, + // this will always be exactly 1. There's no need to call next_ch.utf8_len(). + bytes_parsed += 1; + prev_ch = next_ch; } + let from_base = |base| { + let is_negative = first_ch == '-'; + let string = if is_negative { + &state.input[3..bytes_parsed] + } else { + &state.input[2..bytes_parsed] + }; + + Expr::NonBase10Int { + is_negative, + string, + base, + } + }; + // At this point we have a number, and will definitely succeed. // If the number is malformed (outside the supported range), // we'll succeed with an appropriate Expr which records that. let expr = match typ { - Int => Expr::Int(&state.input[0..chars_parsed]), - Float => Expr::Float(&state.input[0..chars_parsed]), + Int => Expr::Int(&state.input[0..bytes_parsed]), + Float => Expr::Float(&state.input[0..bytes_parsed]), // For these we trim off the 0x/0o/0b part - Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]), - Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]), - Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]), + Hex => from_base(Base::Hex), + Octal => from_base(Base::Octal), + Binary => from_base(Base::Binary), }; - let next_state = state.advance_without_indenting(chars_parsed)?; + let next_state = state.advance_without_indenting(bytes_parsed)?; - Ok((expr, next_state)) + Ok((dbg!(expr), next_state)) } #[derive(Debug, PartialEq, Eq)]