Fix parsing hex/octal/binary literals

This commit is contained in:
Richard Feldman 2019-12-07 02:27:12 -05:00
parent 37422c6a76
commit 2710dcb95c
8 changed files with 163 additions and 115 deletions

View file

@ -3,6 +3,7 @@ use crate::can::expr::Expr;
use crate::can::problem::Problem; use crate::can::problem::Problem;
use crate::can::problem::RuntimeError::*; use crate::can::problem::RuntimeError::*;
use crate::constrain; use crate::constrain;
use crate::parse::ast::Base;
use crate::region::Region; use crate::region::Region;
use crate::subs::VarStore; use crate::subs::VarStore;
use crate::types::Constraint::{self, *}; use crate::types::Constraint::{self, *};
@ -63,21 +64,15 @@ pub fn finish_parsing_int(raw: &str) -> Result<i64, &str> {
} }
#[inline(always)] #[inline(always)]
pub fn finish_parsing_hex(raw: &str) -> Result<i64, &str> { pub fn finish_parsing_base(raw: &str, base: Base) -> Result<i64, &str> {
// Ignore underscores. let radix = match base {
i64::from_str_radix(raw.replace("_", "").as_str(), 16).map_err(|_| raw) Base::Hex => 16,
} Base::Octal => 8,
Base::Binary => 2,
};
#[inline(always)]
pub fn finish_parsing_oct(raw: &str) -> Result<i64, &str> {
// Ignore underscores. // Ignore underscores.
i64::from_str_radix(raw.replace("_", "").as_str(), 8).map_err(|_| raw) i64::from_str_radix(raw.replace("_", "").as_str(), radix).map_err(|_| raw)
}
#[inline(always)]
pub fn finish_parsing_bin(raw: &str) -> Result<i64, &str> {
// Ignore underscores.
i64::from_str_radix(raw.replace("_", "").as_str(), 2).map_err(|_| raw)
} }
#[inline(always)] #[inline(always)]

View file

@ -56,12 +56,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located<Expr<'a>>) -> &'a
| Nested(Float(_)) | Nested(Float(_))
| Int(_) | Int(_)
| Nested(Int(_)) | Nested(Int(_))
| HexInt(_) | NonBase10Int { .. }
| Nested(HexInt(_)) | Nested(NonBase10Int { .. })
| OctalInt(_)
| Nested(OctalInt(_))
| BinaryInt(_)
| Nested(BinaryInt(_))
| Str(_) | Str(_)
| Nested(Str(_)) | Nested(Str(_))
| BlockStr(_) | BlockStr(_)

View file

@ -1,8 +1,5 @@
use crate::can::env::Env; use crate::can::env::Env;
use crate::can::num::{ use crate::can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
finish_parsing_bin, finish_parsing_float, finish_parsing_hex, finish_parsing_int,
finish_parsing_oct,
};
use crate::can::problem::Problem; use crate::can::problem::Problem;
use crate::can::scope::Scope; use crate::can::scope::Scope;
use crate::can::symbol::Symbol; use crate::can::symbol::Symbol;
@ -188,36 +185,20 @@ pub fn canonicalize_pattern<'a>(
} }
}, },
&HexIntLiteral(string) => match pattern_type { &NonBase10Literal {
string,
base,
is_negative,
} => match pattern_type {
CaseBranch => { CaseBranch => {
let int = finish_parsing_hex(string) let int = finish_parsing_base(string, *base)
.unwrap_or_else(|_| panic!("TODO handle malformed hex int pattern")); .unwrap_or_else(|_| panic!("TODO handle malformed {:?} pattern", base));
if *is_negative {
Pattern::IntLiteral(-int)
} else {
Pattern::IntLiteral(int) Pattern::IntLiteral(int)
} }
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
unsupported_pattern(env, ptype, region)
}
},
&OctalIntLiteral(string) => match pattern_type {
CaseBranch => {
let int = finish_parsing_oct(string)
.unwrap_or_else(|_| panic!("TODO handle malformed octal int pattern"));
Pattern::IntLiteral(int)
}
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
unsupported_pattern(env, ptype, region)
}
},
&BinaryIntLiteral(string) => match pattern_type {
CaseBranch => {
let int = finish_parsing_bin(string)
.unwrap_or_else(|_| panic!("TODO handle malformed binary int pattern"));
Pattern::IntLiteral(int)
} }
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => { ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
unsupported_pattern(env, ptype, region) unsupported_pattern(env, ptype, region)
@ -298,7 +279,7 @@ fn add_constraints<'a>(
}, },
); );
} }
IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_) | BinaryIntLiteral(_) => { IntLiteral(_) | NonBase10Literal { .. } => {
state.constraints.push(Constraint::Pattern( state.constraints.push(Constraint::Pattern(
region, region,
PatternCategory::Int, PatternCategory::Int,
@ -377,9 +358,16 @@ pub fn remove_idents(pattern: &ast::Pattern, idents: &mut ImMap<Ident, (Symbol,
// Ignore the newline/comment info; it doesn't matter in canonicalization. // Ignore the newline/comment info; it doesn't matter in canonicalization.
remove_idents(pattern, idents) remove_idents(pattern, idents)
} }
GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | BinaryIntLiteral(_) GlobalTag(_)
| OctalIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_) | PrivateTag(_)
| EmptyRecordLiteral | Malformed(_) | Underscore => {} | IntLiteral(_)
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_)
| EmptyRecordLiteral
| Malformed(_)
| Underscore => {}
} }
} }
@ -439,8 +427,15 @@ fn add_idents_from_pattern<'a>(
// Ignore the newline/comment info; it doesn't matter in canonicalization. // Ignore the newline/comment info; it doesn't matter in canonicalization.
add_idents_from_pattern(region, pattern, scope, answer) add_idents_from_pattern(region, pattern, scope, answer)
} }
GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_) GlobalTag(_)
| BinaryIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_) | PrivateTag(_)
| EmptyRecordLiteral | Malformed(_) | Underscore => (), | IntLiteral(_)
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_)
| EmptyRecordLiteral
| Malformed(_)
| Underscore => (),
} }
} }

View file

@ -1,7 +1,7 @@
use crate::fmt::def::fmt_def; use crate::fmt::def::fmt_def;
use crate::fmt::pattern::fmt_pattern; use crate::fmt::pattern::fmt_pattern;
use crate::fmt::spaces::{add_spaces, fmt_comments_only, fmt_spaces, newline, INDENT}; use crate::fmt::spaces::{add_spaces, fmt_comments_only, fmt_spaces, newline, INDENT};
use crate::parse::ast::{AssignedField, Expr, Pattern}; use crate::parse::ast::{AssignedField, Base, Expr, Pattern};
use crate::region::Located; use crate::region::Located;
use bumpalo::collections::{String, Vec}; use bumpalo::collections::{String, Vec};
@ -66,19 +66,23 @@ pub fn fmt_expr<'a>(
} }
Int(string) => buf.push_str(string), Int(string) => buf.push_str(string),
Float(string) => buf.push_str(string), Float(string) => buf.push_str(string),
HexInt(string) => { NonBase10Int {
buf.push('0'); base,
buf.push('x'); string,
buf.push_str(string); is_negative,
} => {
if *is_negative {
buf.push('-');
} }
BinaryInt(string) => {
buf.push('0'); buf.push('0');
buf.push('b');
buf.push_str(string); buf.push(match base {
} Base::Hex => 'x',
OctalInt(string) => { Base::Octal => 'o',
buf.push('0'); Base::Binary => 'b',
buf.push('o'); });
buf.push_str(string); buf.push_str(string);
} }
Record(loc_fields) => { Record(loc_fields) => {
@ -247,9 +251,7 @@ pub fn is_multiline_expr<'a>(expr: &'a Expr<'a>) -> bool {
// These expressions never have newlines // These expressions never have newlines
Float(_) Float(_)
| Int(_) | Int(_)
| HexInt(_) | NonBase10Int { .. }
| OctalInt(_)
| BinaryInt(_)
| Str(_) | Str(_)
| Access(_, _) | Access(_, _)
| AccessorFunction(_) | AccessorFunction(_)

View file

@ -1,5 +1,5 @@
use crate::fmt::spaces::fmt_spaces; use crate::fmt::spaces::fmt_spaces;
use crate::parse::ast::Pattern; use crate::parse::ast::{Base, Pattern};
use bumpalo::collections::String; use bumpalo::collections::String;
pub fn fmt_pattern<'a>( pub fn fmt_pattern<'a>(
@ -56,9 +56,25 @@ pub fn fmt_pattern<'a>(
} }
IntLiteral(string) => buf.push_str(string), IntLiteral(string) => buf.push_str(string),
HexIntLiteral(string) => buf.push_str(string), NonBase10Literal {
OctalIntLiteral(string) => buf.push_str(string), base,
BinaryIntLiteral(string) => buf.push_str(string), string,
is_negative,
} => {
if *is_negative {
buf.push('-');
}
buf.push('0');
buf.push(match base {
Base::Hex => 'x',
Base::Octal => 'o',
Base::Binary => 'b',
});
buf.push_str(string);
}
FloatLiteral(string) => buf.push_str(string), FloatLiteral(string) => buf.push_str(string),
StrLiteral(string) => buf.push_str(string), StrLiteral(string) => buf.push_str(string),
BlockStrLiteral(lines) => { BlockStrLiteral(lines) => {

View file

@ -115,9 +115,11 @@ pub enum Expr<'a> {
// Number Literals // Number Literals
Float(&'a str), Float(&'a str),
Int(&'a str), Int(&'a str),
HexInt(&'a str), NonBase10Int {
OctalInt(&'a str), string: &'a str,
BinaryInt(&'a str), base: Base,
is_negative: bool,
},
// String Literals // String Literals
Str(&'a str), Str(&'a str),
@ -283,9 +285,11 @@ pub enum Pattern<'a> {
// Literal // Literal
IntLiteral(&'a str), IntLiteral(&'a str),
HexIntLiteral(&'a str), NonBase10Literal {
OctalIntLiteral(&'a str), string: &'a str,
BinaryIntLiteral(&'a str), base: Base,
is_negative: bool,
},
FloatLiteral(&'a str), FloatLiteral(&'a str),
StrLiteral(&'a str), StrLiteral(&'a str),
BlockStrLiteral(&'a [&'a str]), BlockStrLiteral(&'a [&'a str]),
@ -301,6 +305,13 @@ pub enum Pattern<'a> {
QualifiedIdentifier(MaybeQualified<'a, &'a str>), QualifiedIdentifier(MaybeQualified<'a, &'a str>),
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Base {
Octal,
Binary,
Hex,
}
impl<'a> Pattern<'a> { impl<'a> Pattern<'a> {
pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> { pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> {
match ident { match ident {

View file

@ -302,9 +302,15 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
Expr::Float(string) => Ok(Pattern::FloatLiteral(string)), Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
Expr::Int(string) => Ok(Pattern::IntLiteral(string)), Expr::Int(string) => Ok(Pattern::IntLiteral(string)),
Expr::HexInt(string) => Ok(Pattern::HexIntLiteral(string)), Expr::NonBase10Int {
Expr::OctalInt(string) => Ok(Pattern::OctalIntLiteral(string)), string,
Expr::BinaryInt(string) => Ok(Pattern::BinaryIntLiteral(string)), base,
is_negative,
} => Ok(Pattern::NonBase10Literal {
string,
base: *base,
is_negative: *is_negative,
}),
Expr::Str(string) => Ok(Pattern::StrLiteral(string)), Expr::Str(string) => Ok(Pattern::StrLiteral(string)),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)), Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),

View file

@ -1,4 +1,4 @@
use crate::parse::ast::{Attempting, Expr}; use crate::parse::ast::{Attempting, Base, Expr};
use crate::parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; use crate::parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char; use std::char;
@ -39,27 +39,25 @@ where
let mut typ = Int; let mut typ = Int;
// We already parsed 1 character (which may have been a minus sign). // We already parsed 1 character (which may have been a minus sign).
let mut chars_parsed = 1; let mut bytes_parsed = 1;
let mut prev_ch = first_ch;
let mut has_parsed_digits = first_ch.is_ascii_digit();
for next_ch in chars { for next_ch in chars {
let err_unexpected = || { let err_unexpected = || {
Err(unexpected( Err(unexpected(
next_ch, next_ch,
chars_parsed, bytes_parsed,
state.clone(), state.clone(),
Attempting::NumberLiteral, Attempting::NumberLiteral,
)) ))
}; };
// Returns true iff so far we have parsed the given char and no other chars. let is_potentially_non_base10 = || {
let so_far_parsed = |ch| chars_parsed == 1 && first_ch == ch; (bytes_parsed == 1 && first_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0')
};
// We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not).
// If you want that, do something like (negate 0x01).
//
// I'm open to changing this policy (that is, allowing support for
// negative escaped ints), but it'll complicate parsing logic and seems
// nonessential, so I'm leaving it out for now.
if next_ch == '.' { if next_ch == '.' {
if typ == Float { if typ == Float {
// You only get one decimal point! // You only get one decimal point!
@ -68,51 +66,80 @@ where
typ = Float; typ = Float;
} }
} else if next_ch == 'x' { } else if next_ch == 'x' {
if so_far_parsed('0') { if is_potentially_non_base10() {
typ = Hex; typ = Hex;
} else { } else {
return err_unexpected(); return err_unexpected();
} }
} else if next_ch == 'b' { } else if next_ch == 'b' && typ == Int {
if so_far_parsed('0') { // We have to check for typ == Int because otherwise we get a false
// positive here when parsing a hex literal that happens to have
// a 'b' in it, e.g. 0xbbbb
if is_potentially_non_base10() {
typ = Binary; typ = Binary;
} else { } else {
return err_unexpected(); return err_unexpected();
} }
} else if next_ch == 'o' { } else if next_ch == 'o' {
if so_far_parsed('0') { if is_potentially_non_base10() {
typ = Octal; typ = Octal;
} else { } else {
return err_unexpected(); return err_unexpected();
} }
} else if !next_ch.is_ascii_digit() && next_ch != '_' { } else if next_ch.is_ascii_digit() {
if so_far_parsed('-') { has_parsed_digits = true;
// No digits! We likely parsed a minus sign that's actually an operator. } else if next_ch != '_' &&
return err_unexpected(); // ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
} else { // We parse them in any int literal, so we can give a more helpful error
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
// or "the character 'g' is outside the range of valid Hex literals")
!next_ch.is_ascii_alphabetic()
{
if has_parsed_digits {
// We hit an invalid number literal character; we're done! // We hit an invalid number literal character; we're done!
break; break;
} else {
// No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected();
} }
} }
chars_parsed += 1; // Since we only consume characters in the ASCII range for number literals,
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
bytes_parsed += 1;
prev_ch = next_ch;
} }
let from_base = |base| {
let is_negative = first_ch == '-';
let string = if is_negative {
&state.input[3..bytes_parsed]
} else {
&state.input[2..bytes_parsed]
};
Expr::NonBase10Int {
is_negative,
string,
base,
}
};
// At this point we have a number, and will definitely succeed. // At this point we have a number, and will definitely succeed.
// If the number is malformed (outside the supported range), // If the number is malformed (outside the supported range),
// we'll succeed with an appropriate Expr which records that. // we'll succeed with an appropriate Expr which records that.
let expr = match typ { let expr = match typ {
Int => Expr::Int(&state.input[0..chars_parsed]), Int => Expr::Int(&state.input[0..bytes_parsed]),
Float => Expr::Float(&state.input[0..chars_parsed]), Float => Expr::Float(&state.input[0..bytes_parsed]),
// For these we trim off the 0x/0o/0b part // For these we trim off the 0x/0o/0b part
Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]), Hex => from_base(Base::Hex),
Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]), Octal => from_base(Base::Octal),
Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]), Binary => from_base(Base::Binary),
}; };
let next_state = state.advance_without_indenting(chars_parsed)?; let next_state = state.advance_without_indenting(bytes_parsed)?;
Ok((expr, next_state)) Ok((dbg!(expr), next_state))
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]