Fix parsing hex/octal/binary literals

This commit is contained in:
Richard Feldman 2019-12-07 02:27:12 -05:00
parent 37422c6a76
commit 2710dcb95c
8 changed files with 163 additions and 115 deletions

View file

@ -3,6 +3,7 @@ use crate::can::expr::Expr;
use crate::can::problem::Problem;
use crate::can::problem::RuntimeError::*;
use crate::constrain;
use crate::parse::ast::Base;
use crate::region::Region;
use crate::subs::VarStore;
use crate::types::Constraint::{self, *};
@ -63,21 +64,15 @@ pub fn finish_parsing_int(raw: &str) -> Result<i64, &str> {
}
#[inline(always)]
pub fn finish_parsing_hex(raw: &str) -> Result<i64, &str> {
// Ignore underscores.
i64::from_str_radix(raw.replace("_", "").as_str(), 16).map_err(|_| raw)
}
pub fn finish_parsing_base(raw: &str, base: Base) -> Result<i64, &str> {
let radix = match base {
Base::Hex => 16,
Base::Octal => 8,
Base::Binary => 2,
};
#[inline(always)]
pub fn finish_parsing_oct(raw: &str) -> Result<i64, &str> {
// Ignore underscores.
i64::from_str_radix(raw.replace("_", "").as_str(), 8).map_err(|_| raw)
}
#[inline(always)]
pub fn finish_parsing_bin(raw: &str) -> Result<i64, &str> {
// Ignore underscores.
i64::from_str_radix(raw.replace("_", "").as_str(), 2).map_err(|_| raw)
i64::from_str_radix(raw.replace("_", "").as_str(), radix).map_err(|_| raw)
}
#[inline(always)]

View file

@ -56,12 +56,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located<Expr<'a>>) -> &'a
| Nested(Float(_))
| Int(_)
| Nested(Int(_))
| HexInt(_)
| Nested(HexInt(_))
| OctalInt(_)
| Nested(OctalInt(_))
| BinaryInt(_)
| Nested(BinaryInt(_))
| NonBase10Int { .. }
| Nested(NonBase10Int { .. })
| Str(_)
| Nested(Str(_))
| BlockStr(_)

View file

@ -1,8 +1,5 @@
use crate::can::env::Env;
use crate::can::num::{
finish_parsing_bin, finish_parsing_float, finish_parsing_hex, finish_parsing_int,
finish_parsing_oct,
};
use crate::can::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
use crate::can::problem::Problem;
use crate::can::scope::Scope;
use crate::can::symbol::Symbol;
@ -188,36 +185,20 @@ pub fn canonicalize_pattern<'a>(
}
},
&HexIntLiteral(string) => match pattern_type {
&NonBase10Literal {
string,
base,
is_negative,
} => match pattern_type {
CaseBranch => {
let int = finish_parsing_hex(string)
.unwrap_or_else(|_| panic!("TODO handle malformed hex int pattern"));
let int = finish_parsing_base(string, *base)
.unwrap_or_else(|_| panic!("TODO handle malformed {:?} pattern", base));
Pattern::IntLiteral(int)
}
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
unsupported_pattern(env, ptype, region)
}
},
&OctalIntLiteral(string) => match pattern_type {
CaseBranch => {
let int = finish_parsing_oct(string)
.unwrap_or_else(|_| panic!("TODO handle malformed octal int pattern"));
Pattern::IntLiteral(int)
}
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
unsupported_pattern(env, ptype, region)
}
},
&BinaryIntLiteral(string) => match pattern_type {
CaseBranch => {
let int = finish_parsing_bin(string)
.unwrap_or_else(|_| panic!("TODO handle malformed binary int pattern"));
Pattern::IntLiteral(int)
if *is_negative {
Pattern::IntLiteral(-int)
} else {
Pattern::IntLiteral(int)
}
}
ptype @ Assignment | ptype @ TopLevelDef | ptype @ FunctionArg => {
unsupported_pattern(env, ptype, region)
@ -298,7 +279,7 @@ fn add_constraints<'a>(
},
);
}
IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_) | BinaryIntLiteral(_) => {
IntLiteral(_) | NonBase10Literal { .. } => {
state.constraints.push(Constraint::Pattern(
region,
PatternCategory::Int,
@ -377,9 +358,16 @@ pub fn remove_idents(pattern: &ast::Pattern, idents: &mut ImMap<Ident, (Symbol,
// Ignore the newline/comment info; it doesn't matter in canonicalization.
remove_idents(pattern, idents)
}
GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | BinaryIntLiteral(_)
| OctalIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_)
| EmptyRecordLiteral | Malformed(_) | Underscore => {}
GlobalTag(_)
| PrivateTag(_)
| IntLiteral(_)
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_)
| EmptyRecordLiteral
| Malformed(_)
| Underscore => {}
}
}
@ -439,8 +427,15 @@ fn add_idents_from_pattern<'a>(
// Ignore the newline/comment info; it doesn't matter in canonicalization.
add_idents_from_pattern(region, pattern, scope, answer)
}
GlobalTag(_) | PrivateTag(_) | IntLiteral(_) | HexIntLiteral(_) | OctalIntLiteral(_)
| BinaryIntLiteral(_) | FloatLiteral(_) | StrLiteral(_) | BlockStrLiteral(_)
| EmptyRecordLiteral | Malformed(_) | Underscore => (),
GlobalTag(_)
| PrivateTag(_)
| IntLiteral(_)
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_)
| EmptyRecordLiteral
| Malformed(_)
| Underscore => (),
}
}

View file

@ -1,7 +1,7 @@
use crate::fmt::def::fmt_def;
use crate::fmt::pattern::fmt_pattern;
use crate::fmt::spaces::{add_spaces, fmt_comments_only, fmt_spaces, newline, INDENT};
use crate::parse::ast::{AssignedField, Expr, Pattern};
use crate::parse::ast::{AssignedField, Base, Expr, Pattern};
use crate::region::Located;
use bumpalo::collections::{String, Vec};
@ -66,19 +66,23 @@ pub fn fmt_expr<'a>(
}
Int(string) => buf.push_str(string),
Float(string) => buf.push_str(string),
HexInt(string) => {
NonBase10Int {
base,
string,
is_negative,
} => {
if *is_negative {
buf.push('-');
}
buf.push('0');
buf.push('x');
buf.push_str(string);
}
BinaryInt(string) => {
buf.push('0');
buf.push('b');
buf.push_str(string);
}
OctalInt(string) => {
buf.push('0');
buf.push('o');
buf.push(match base {
Base::Hex => 'x',
Base::Octal => 'o',
Base::Binary => 'b',
});
buf.push_str(string);
}
Record(loc_fields) => {
@ -247,9 +251,7 @@ pub fn is_multiline_expr<'a>(expr: &'a Expr<'a>) -> bool {
// These expressions never have newlines
Float(_)
| Int(_)
| HexInt(_)
| OctalInt(_)
| BinaryInt(_)
| NonBase10Int { .. }
| Str(_)
| Access(_, _)
| AccessorFunction(_)

View file

@ -1,5 +1,5 @@
use crate::fmt::spaces::fmt_spaces;
use crate::parse::ast::Pattern;
use crate::parse::ast::{Base, Pattern};
use bumpalo::collections::String;
pub fn fmt_pattern<'a>(
@ -56,9 +56,25 @@ pub fn fmt_pattern<'a>(
}
IntLiteral(string) => buf.push_str(string),
HexIntLiteral(string) => buf.push_str(string),
OctalIntLiteral(string) => buf.push_str(string),
BinaryIntLiteral(string) => buf.push_str(string),
NonBase10Literal {
base,
string,
is_negative,
} => {
if *is_negative {
buf.push('-');
}
buf.push('0');
buf.push(match base {
Base::Hex => 'x',
Base::Octal => 'o',
Base::Binary => 'b',
});
buf.push_str(string);
}
FloatLiteral(string) => buf.push_str(string),
StrLiteral(string) => buf.push_str(string),
BlockStrLiteral(lines) => {

View file

@ -115,9 +115,11 @@ pub enum Expr<'a> {
// Number Literals
Float(&'a str),
Int(&'a str),
HexInt(&'a str),
OctalInt(&'a str),
BinaryInt(&'a str),
NonBase10Int {
string: &'a str,
base: Base,
is_negative: bool,
},
// String Literals
Str(&'a str),
@ -283,9 +285,11 @@ pub enum Pattern<'a> {
// Literal
IntLiteral(&'a str),
HexIntLiteral(&'a str),
OctalIntLiteral(&'a str),
BinaryIntLiteral(&'a str),
NonBase10Literal {
string: &'a str,
base: Base,
is_negative: bool,
},
FloatLiteral(&'a str),
StrLiteral(&'a str),
BlockStrLiteral(&'a [&'a str]),
@ -301,6 +305,13 @@ pub enum Pattern<'a> {
QualifiedIdentifier(MaybeQualified<'a, &'a str>),
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum Base {
Octal,
Binary,
Hex,
}
impl<'a> Pattern<'a> {
pub fn from_ident(arena: &'a Bump, ident: Ident<'a>) -> Pattern<'a> {
match ident {

View file

@ -302,9 +302,15 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
Expr::Float(string) => Ok(Pattern::FloatLiteral(string)),
Expr::Int(string) => Ok(Pattern::IntLiteral(string)),
Expr::HexInt(string) => Ok(Pattern::HexIntLiteral(string)),
Expr::OctalInt(string) => Ok(Pattern::OctalIntLiteral(string)),
Expr::BinaryInt(string) => Ok(Pattern::BinaryIntLiteral(string)),
Expr::NonBase10Int {
string,
base,
is_negative,
} => Ok(Pattern::NonBase10Literal {
string,
base: *base,
is_negative: *is_negative,
}),
Expr::Str(string) => Ok(Pattern::StrLiteral(string)),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),

View file

@ -1,4 +1,4 @@
use crate::parse::ast::{Attempting, Expr};
use crate::parse::ast::{Attempting, Base, Expr};
use crate::parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char;
@ -39,27 +39,25 @@ where
let mut typ = Int;
// We already parsed 1 character (which may have been a minus sign).
let mut chars_parsed = 1;
let mut bytes_parsed = 1;
let mut prev_ch = first_ch;
let mut has_parsed_digits = first_ch.is_ascii_digit();
for next_ch in chars {
let err_unexpected = || {
Err(unexpected(
next_ch,
chars_parsed,
bytes_parsed,
state.clone(),
Attempting::NumberLiteral,
))
};
// Returns true iff so far we have parsed the given char and no other chars.
let so_far_parsed = |ch| chars_parsed == 1 && first_ch == ch;
let is_potentially_non_base10 = || {
(bytes_parsed == 1 && first_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0')
};
// We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not).
// If you want that, do something like (negate 0x01).
//
// I'm open to changing this policy (that is, allowing support for
// negative escaped ints), but it'll complicate parsing logic and seems
// nonessential, so I'm leaving it out for now.
if next_ch == '.' {
if typ == Float {
// You only get one decimal point!
@ -68,51 +66,80 @@ where
typ = Float;
}
} else if next_ch == 'x' {
if so_far_parsed('0') {
if is_potentially_non_base10() {
typ = Hex;
} else {
return err_unexpected();
}
} else if next_ch == 'b' {
if so_far_parsed('0') {
} else if next_ch == 'b' && typ == Int {
// We have to check for typ == Int because otherwise we get a false
// positive here when parsing a hex literal that happens to have
// a 'b' in it, e.g. 0xbbbb
if is_potentially_non_base10() {
typ = Binary;
} else {
return err_unexpected();
}
} else if next_ch == 'o' {
if so_far_parsed('0') {
if is_potentially_non_base10() {
typ = Octal;
} else {
return err_unexpected();
}
} else if !next_ch.is_ascii_digit() && next_ch != '_' {
if so_far_parsed('-') {
// No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected();
} else {
} else if next_ch.is_ascii_digit() {
has_parsed_digits = true;
} else if next_ch != '_' &&
// ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
// We parse them in any int literal, so we can give a more helpful error
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
// or "the character 'g' is outside the range of valid Hex literals")
!next_ch.is_ascii_alphabetic()
{
if has_parsed_digits {
// We hit an invalid number literal character; we're done!
break;
} else {
// No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected();
}
}
chars_parsed += 1;
// Since we only consume characters in the ASCII range for number literals,
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
bytes_parsed += 1;
prev_ch = next_ch;
}
let from_base = |base| {
let is_negative = first_ch == '-';
let string = if is_negative {
&state.input[3..bytes_parsed]
} else {
&state.input[2..bytes_parsed]
};
Expr::NonBase10Int {
is_negative,
string,
base,
}
};
// At this point we have a number, and will definitely succeed.
// If the number is malformed (outside the supported range),
// we'll succeed with an appropriate Expr which records that.
let expr = match typ {
Int => Expr::Int(&state.input[0..chars_parsed]),
Float => Expr::Float(&state.input[0..chars_parsed]),
Int => Expr::Int(&state.input[0..bytes_parsed]),
Float => Expr::Float(&state.input[0..bytes_parsed]),
// For these we trim off the 0x/0o/0b part
Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]),
Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]),
Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]),
Hex => from_base(Base::Hex),
Octal => from_base(Base::Octal),
Binary => from_base(Base::Binary),
};
let next_state = state.advance_without_indenting(chars_parsed)?;
let next_state = state.advance_without_indenting(bytes_parsed)?;
Ok((expr, next_state))
Ok((dbg!(expr), next_state))
}
#[derive(Debug, PartialEq, Eq)]