mirror of
https://github.com/roc-lang/roc.git
synced 2025-10-03 08:34:33 +00:00
Attempt at fixing Problems bug
This commit is contained in:
parent
3043862989
commit
fc363c62cf
3 changed files with 114 additions and 52 deletions
|
@ -27,6 +27,6 @@ extern crate im_rc;
|
||||||
extern crate num;
|
extern crate num;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate combine;
|
extern crate combine; // OBSOLETE
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate log;
|
||||||
|
|
106
src/parser.rs
106
src/parser.rs
|
@ -16,7 +16,7 @@ use std::char;
|
||||||
type Loc<T> = region::Located<T>;
|
type Loc<T> = region::Located<T>;
|
||||||
|
|
||||||
/// Struct which represents a position in a source file.
|
/// Struct which represents a position in a source file.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct State<'a> {
|
pub struct State<'a> {
|
||||||
/// The raw input string.
|
/// The raw input string.
|
||||||
pub input: &'a str,
|
pub input: &'a str,
|
||||||
|
@ -33,6 +33,28 @@ pub struct State<'a> {
|
||||||
// true at the beginning of each line, then false after encountering
|
// true at the beginning of each line, then false after encountering
|
||||||
// the first nonspace char on that line.
|
// the first nonspace char on that line.
|
||||||
pub is_indenting: bool,
|
pub is_indenting: bool,
|
||||||
|
|
||||||
|
pub problems: Problems<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> State<'a> {
|
||||||
|
pub fn from_input(input: &'a str, problems: Problems<'a>) -> State<'a> {
|
||||||
|
State {
|
||||||
|
input,
|
||||||
|
problems,
|
||||||
|
line: 0,
|
||||||
|
column: 0,
|
||||||
|
indent_col: 1,
|
||||||
|
is_indenting: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn state_size() {
|
||||||
|
// State should always be under 8 machine words, so it fits in a typical
|
||||||
|
// cache line.
|
||||||
|
assert!(std::mem::size_of::<State>() <= std::mem::size_of::<usize>() * 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
type Problems<'a> = Vec<'a, Located<Problem>>;
|
type Problems<'a> = Vec<'a, Located<Problem>>;
|
||||||
|
@ -175,35 +197,24 @@ fn pattern_size() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>;
|
pub type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>;
|
||||||
|
|
||||||
trait Parser<'a, Output> {
|
pub trait Parser<'a, Output> {
|
||||||
fn parse(
|
fn parse(&self, &'a Bump, &'a mut State<'a>, attempting: Attempting)
|
||||||
&self,
|
-> ParseResult<'a, Output>;
|
||||||
&'a Bump,
|
|
||||||
&'a State<'a>,
|
|
||||||
problems: &'a mut Problems<'a>,
|
|
||||||
attempting: Attempting,
|
|
||||||
) -> ParseResult<'a, Output>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, F, Output> Parser<'a, Output> for F
|
impl<'a, F, Output> Parser<'a, Output> for F
|
||||||
where
|
where
|
||||||
F: Fn(
|
F: Fn(&'a Bump, &'a mut State<'a>, Attempting) -> ParseResult<'a, Output>,
|
||||||
&'a Bump,
|
|
||||||
&'a State<'a>,
|
|
||||||
&'a mut Vec<'a, Located<Problem>>,
|
|
||||||
Attempting,
|
|
||||||
) -> ParseResult<'a, Output>,
|
|
||||||
{
|
{
|
||||||
fn parse(
|
fn parse(
|
||||||
&self,
|
&self,
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
state: &'a State<'a>,
|
state: &'a mut State<'a>,
|
||||||
problems: &'a mut Problems<'a>,
|
|
||||||
attempting: Attempting,
|
attempting: Attempting,
|
||||||
) -> ParseResult<'a, Output> {
|
) -> ParseResult<'a, Output> {
|
||||||
self(arena, state, problems, attempting)
|
self(arena, state, attempting)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -212,9 +223,9 @@ where
|
||||||
P: Parser<'a, Before>,
|
P: Parser<'a, Before>,
|
||||||
F: Fn(Before) -> After,
|
F: Fn(Before) -> After,
|
||||||
{
|
{
|
||||||
move |arena, state, problems, attempting| {
|
move |arena, state, attempting| {
|
||||||
parser
|
parser
|
||||||
.parse(arena, state, problems, attempting)
|
.parse(arena, state, attempting)
|
||||||
.map(|(next_state, output)| (next_state, transform(output)))
|
.map(|(next_state, output)| (next_state, transform(output)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -223,7 +234,7 @@ fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val
|
||||||
where
|
where
|
||||||
P: Parser<'a, Val>,
|
P: Parser<'a, Val>,
|
||||||
{
|
{
|
||||||
move |arena, state, problems, _| parser.parse(arena, state, problems, attempting)
|
move |arena, state, _| parser.parse(arena, state, attempting)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A keyword with no newlines in it.
|
/// A keyword with no newlines in it.
|
||||||
|
@ -232,7 +243,7 @@ fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> {
|
||||||
// in the state, only the column.
|
// in the state, only the column.
|
||||||
debug_assert!(!kw.contains("\n"));
|
debug_assert!(!kw.contains("\n"));
|
||||||
|
|
||||||
move |_arena: &'a Bump, state: &'a State<'a>, _problems, attempting| {
|
move |_arena: &'a Bump, state: &'a mut State<'a>, attempting| {
|
||||||
let input = state.input;
|
let input = state.input;
|
||||||
|
|
||||||
match input.get(0..kw.len()) {
|
match input.get(0..kw.len()) {
|
||||||
|
@ -259,8 +270,8 @@ where
|
||||||
P: Parser<'a, A>,
|
P: Parser<'a, A>,
|
||||||
F: Fn(&A) -> bool,
|
F: Fn(&A) -> bool,
|
||||||
{
|
{
|
||||||
move |arena: &'a Bump, state: &'a State<'a>, problems, attempting| {
|
move |arena: &'a Bump, state: &'a mut State<'a>, attempting| {
|
||||||
if let Ok((next_state, output)) = parser.parse(arena, state, problems, attempting) {
|
if let Ok((next_state, output)) = parser.parse(arena, state, attempting) {
|
||||||
if predicate(&output) {
|
if predicate(&output) {
|
||||||
return Ok((next_state, output));
|
return Ok((next_state, output));
|
||||||
}
|
}
|
||||||
|
@ -272,8 +283,7 @@ where
|
||||||
|
|
||||||
fn any<'a>(
|
fn any<'a>(
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
state: &'a State<'a>,
|
state: &'a mut State<'a>,
|
||||||
_problems: &'a mut Problems<'a>,
|
|
||||||
attempting: Attempting,
|
attempting: Attempting,
|
||||||
) -> ParseResult<'a, char> {
|
) -> ParseResult<'a, char> {
|
||||||
let input = state.input;
|
let input = state.input;
|
||||||
|
@ -304,12 +314,13 @@ fn whitespace<'a>() -> impl Parser<'a, char> {
|
||||||
|
|
||||||
/// What we're currently attempting to parse, e.g.
|
/// What we're currently attempting to parse, e.g.
|
||||||
/// "currently attempting to parse a list." This helps error messages!
|
/// "currently attempting to parse a list." This helps error messages!
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum Attempting {
|
pub enum Attempting {
|
||||||
List,
|
List,
|
||||||
Keyword,
|
Keyword,
|
||||||
StringLiteral,
|
StringLiteral,
|
||||||
EscapedUnicodeChar,
|
EscapedUnicodeChar,
|
||||||
|
Expression,
|
||||||
}
|
}
|
||||||
|
|
||||||
// fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> Expr {
|
// fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> Expr {
|
||||||
|
@ -367,8 +378,12 @@ pub enum Attempting {
|
||||||
// }))
|
// }))
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
|
string_literal()
|
||||||
|
}
|
||||||
|
|
||||||
fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
move |arena: &'a Bump, state: &'a State<'a>, problems: &'a mut Problems<'a>, attempting| {
|
move |arena: &'a Bump, state: &'a mut State<'a>, attempting| {
|
||||||
let mut chars = state.input.chars();
|
let mut chars = state.input.chars();
|
||||||
|
|
||||||
// String literals must start with a quote.
|
// String literals must start with a quote.
|
||||||
|
@ -403,9 +418,7 @@ fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
Some('t') => buf.push('\t'),
|
Some('t') => buf.push('\t'),
|
||||||
Some('n') => buf.push('\n'),
|
Some('n') => buf.push('\n'),
|
||||||
Some('r') => buf.push('\r'),
|
Some('r') => buf.push('\r'),
|
||||||
Some('u') => {
|
Some('u') => handle_escaped_unicode(arena, state, &mut chars, &mut buf),
|
||||||
handle_escaped_unicode(arena, state, &mut chars, &mut buf, problems)
|
|
||||||
}
|
|
||||||
Some('(') => panic!("TODO handle string interpolation"),
|
Some('(') => panic!("TODO handle string interpolation"),
|
||||||
Some(unsupported) => {
|
Some(unsupported) => {
|
||||||
// TODO don't bail out here! Instead, parse successfully
|
// TODO don't bail out here! Instead, parse successfully
|
||||||
|
@ -448,7 +461,7 @@ fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum Problem {
|
pub enum Problem {
|
||||||
/// TODO Invalid hex code - Unicode code points must be specified using hexadecimal characters (the numbers 0-9 and letters A-F)
|
/// TODO Invalid hex code - Unicode code points must be specified using hexadecimal characters (the numbers 0-9 and letters A-F)
|
||||||
NonHexCharsInUnicodeCodePoint,
|
NonHexCharsInUnicodeCodePoint,
|
||||||
|
@ -468,10 +481,10 @@ fn is_ascii_number(ch: char) -> bool {
|
||||||
|
|
||||||
fn escaped_unicode_problem<'a>(
|
fn escaped_unicode_problem<'a>(
|
||||||
problem: Problem,
|
problem: Problem,
|
||||||
state: &'a State<'a>,
|
state: &'a mut State<'a>,
|
||||||
buf_len: usize,
|
buf_len: usize,
|
||||||
hex_str_len: usize,
|
hex_str_len: usize,
|
||||||
) -> Located<Problem> {
|
) {
|
||||||
let start_line = state.line;
|
let start_line = state.line;
|
||||||
let start_col = state.column + buf_len as u32;
|
let start_col = state.column + buf_len as u32;
|
||||||
let end_line = start_line;
|
let end_line = start_line;
|
||||||
|
@ -485,18 +498,17 @@ fn escaped_unicode_problem<'a>(
|
||||||
end_col,
|
end_col,
|
||||||
};
|
};
|
||||||
|
|
||||||
Located {
|
state.problems.push(Located {
|
||||||
region,
|
region,
|
||||||
value: problem,
|
value: problem,
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_escaped_unicode<'a, I>(
|
fn handle_escaped_unicode<'a, I>(
|
||||||
arena: &'a Bump,
|
arena: &'a Bump,
|
||||||
state: &'a State<'a>,
|
state: &'a mut State<'a>,
|
||||||
chars: &mut I,
|
chars: &mut I,
|
||||||
buf: &mut String<'a>,
|
buf: &mut String<'a>,
|
||||||
problems: &mut Problems<'a>,
|
|
||||||
) where
|
) where
|
||||||
I: Iterator<Item = char>,
|
I: Iterator<Item = char>,
|
||||||
{
|
{
|
||||||
|
@ -504,14 +516,12 @@ fn handle_escaped_unicode<'a, I>(
|
||||||
// so we should always see a '{' next.
|
// so we should always see a '{' next.
|
||||||
if chars.next() != Some('{') {
|
if chars.next() != Some('{') {
|
||||||
// This is not a blocker. Keep parsing.
|
// This is not a blocker. Keep parsing.
|
||||||
let prob = escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
Problem::MalformedEscapedUnicode,
|
Problem::MalformedEscapedUnicode,
|
||||||
state,
|
state,
|
||||||
buf.len(),
|
buf.len(),
|
||||||
2, // So far we've parsed `\u`
|
2, // So far we've parsed `\u`
|
||||||
);
|
);
|
||||||
|
|
||||||
problems.push(prob);
|
|
||||||
} else {
|
} else {
|
||||||
// Stores the accumulated unicode digits
|
// Stores the accumulated unicode digits
|
||||||
let mut hex_str = String::new_in(arena);
|
let mut hex_str = String::new_in(arena);
|
||||||
|
@ -526,41 +536,35 @@ fn handle_escaped_unicode<'a, I>(
|
||||||
match u32::from_str_radix(&hex_str, 16) {
|
match u32::from_str_radix(&hex_str, 16) {
|
||||||
Ok(code_pt) => {
|
Ok(code_pt) => {
|
||||||
if code_pt > 0x10FFFF {
|
if code_pt > 0x10FFFF {
|
||||||
let prob = escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
Problem::UnicodeCodePointTooLarge,
|
Problem::UnicodeCodePointTooLarge,
|
||||||
state,
|
state,
|
||||||
buf.len(),
|
buf.len(),
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
|
||||||
problems.push(prob);
|
|
||||||
} else {
|
} else {
|
||||||
// If it all checked out, add it to
|
// If it all checked out, add it to
|
||||||
// the main buffer.
|
// the main buffer.
|
||||||
match char::from_u32(code_pt) {
|
match char::from_u32(code_pt) {
|
||||||
Some(ch) => buf.push(ch),
|
Some(ch) => buf.push(ch),
|
||||||
None => {
|
None => {
|
||||||
let prob = escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
Problem::InvalidUnicodeCodePoint,
|
Problem::InvalidUnicodeCodePoint,
|
||||||
state,
|
state,
|
||||||
buf.len(),
|
buf.len(),
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
|
||||||
problems.push(prob);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
let prob = escaped_unicode_problem(
|
escaped_unicode_problem(
|
||||||
Problem::NonHexCharsInUnicodeCodePoint,
|
Problem::NonHexCharsInUnicodeCodePoint,
|
||||||
state,
|
state,
|
||||||
buf.len(),
|
buf.len(),
|
||||||
hex_str.len(),
|
hex_str.len(),
|
||||||
);
|
);
|
||||||
|
|
||||||
problems.push(prob);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
58
tests/test_parser.rs
Normal file
58
tests/test_parser.rs
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
#[macro_use]
|
||||||
|
extern crate pretty_assertions;
|
||||||
|
#[macro_use]
|
||||||
|
extern crate indoc;
|
||||||
|
extern crate bumpalo;
|
||||||
|
extern crate combine; // OBSOLETE
|
||||||
|
extern crate roc;
|
||||||
|
|
||||||
|
mod helpers;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test_parser {
|
||||||
|
use bumpalo::Bump;
|
||||||
|
use roc::parser::Expr::{self, *};
|
||||||
|
use roc::parser::{Attempting, Parser, Problem, State};
|
||||||
|
|
||||||
|
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
|
||||||
|
assert_parses_to_problems(input, expected_expr, Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_parses_to_problems<'a>(
|
||||||
|
input: &'a str,
|
||||||
|
expected_expr: Expr<'a>,
|
||||||
|
expected_problems: Vec<Problem>,
|
||||||
|
) {
|
||||||
|
let state = State::from_input(&input);
|
||||||
|
let arena = Bump::new();
|
||||||
|
let mut problems = bumpalo::collections::vec::Vec::new_in(&arena);
|
||||||
|
let attempting = Attempting::Expression;
|
||||||
|
let parser = roc::parser::expr();
|
||||||
|
let answer = parser.parse(&arena, &state, &mut problems, attempting);
|
||||||
|
let actual = answer
|
||||||
|
.map(|(_, expr)| expr)
|
||||||
|
.map_err(|(_, attempting)| attempting);
|
||||||
|
|
||||||
|
let mut actual_problems: Vec<Problem> = Vec::new();
|
||||||
|
|
||||||
|
for loc_problem in problems {
|
||||||
|
actual_problems.push(loc_problem.value);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(expected_problems, actual_problems);
|
||||||
|
|
||||||
|
assert_eq!(Ok(expected_expr), actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_list() {
|
||||||
|
assert_parses_to(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
""
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
EmptyStr,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue