checkpoint 2

This commit is contained in:
Folkert 2021-02-26 20:41:42 +01:00
parent 5ec0103996
commit 98a3d42eff
5 changed files with 120 additions and 48 deletions

View file

@ -2224,17 +2224,14 @@ fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>, SyntaxError<
fn string_literal<'a>() -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> { fn string_literal<'a>() -> impl Parser<'a, Expr<'a>, SyntaxError<'a>> {
specialize( specialize(
|_, r, c| SyntaxError::Expr(EExpr::Str(EString::EndlessSingle, r, c)), |e, r, c| SyntaxError::Expr(EExpr::Str(e, r, c)),
map!(crate::string_literal::parse(), Expr::Str), map!(crate::string_literal::parse(), Expr::Str),
) )
} }
#[allow(dead_code)] #[allow(dead_code)]
fn string_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> { fn string_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
specialize( map!(crate::string_literal::parse(), Expr::Str)
|_, _, _| EString::EndlessSingle,
map!(crate::string_literal::parse(), Expr::Str),
)
} }
#[allow(dead_code)] #[allow(dead_code)]

View file

@ -1,9 +1,12 @@
use crate::ast::{CommentOrNewline, Spaceable, StrLiteral, TypeAnnotation};
use crate::blankspace::space0; use crate::blankspace::space0;
use crate::ident::lowercase_ident; use crate::ident::lowercase_ident;
use crate::module::package_name; use crate::module::package_name;
use crate::parser::{ascii_char, optional, Either, Parser, Progress::*, State, SyntaxError}; use crate::parser::{ascii_char, optional, Either, Parser, Progress::*, State, SyntaxError};
use crate::string_literal; use crate::string_literal;
use crate::{
ast::{CommentOrNewline, Spaceable, StrLiteral, TypeAnnotation},
parser::specialize,
};
use bumpalo::collections::Vec; use bumpalo::collections::Vec;
use inlinable_string::InlinableString; use inlinable_string::InlinableString;
use roc_region::all::Loc; use roc_region::all::Loc;
@ -272,7 +275,10 @@ pub fn package_entry<'a>() -> impl Parser<'a, PackageEntry<'a>, SyntaxError<'a>>
pub fn package_or_path<'a>() -> impl Parser<'a, PackageOrPath<'a>, SyntaxError<'a>> { pub fn package_or_path<'a>() -> impl Parser<'a, PackageOrPath<'a>, SyntaxError<'a>> {
map!( map!(
either!( either!(
string_literal::parse(), specialize(
|e, r, c| SyntaxError::Expr(crate::parser::EExpr::Str(e, r, c)),
string_literal::parse()
),
and!( and!(
package_name(), package_name(),
skip_first!(one_or_more!(ascii_char(b' ')), package_version()) skip_first!(one_or_more!(ascii_char(b' ')), package_version())

View file

@ -189,7 +189,13 @@ pub fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>, SyntaxError<'a>> {
and!( and!(
skip_first!( skip_first!(
ascii_string("app"), ascii_string("app"),
and!(space1(1), loc!(string_literal::parse())) and!(
space1(1),
loc!(crate::parser::specialize(
|e, r, c| SyntaxError::Expr(crate::parser::EExpr::Str(e, r, c)),
string_literal::parse()
))
)
), ),
and!( and!(
optional(packages()), optional(packages()),

View file

@ -420,9 +420,14 @@ pub enum Number {
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum EString<'a> { pub enum EString<'a> {
Open(Row, Col),
End, End,
LineTooLong,
EndlessSingle, CodePointOpen(Row, Col),
CodePointEnd(Row, Col),
Space(BadInputError, Row, Col),
EndlessSingle(Row, Col),
EndlessMulti, EndlessMulti,
StringEscape(Escape), StringEscape(Escape),
Format(&'a SyntaxError<'a>), Format(&'a SyntaxError<'a>),

View file

@ -1,33 +1,72 @@
use crate::ast::{Attempting, EscapedChar, StrLiteral, StrSegment}; use crate::ast::{EscapedChar, StrLiteral, StrSegment};
use crate::expr; use crate::expr;
use crate::parser::Progress::*; use crate::parser::Progress::*;
use crate::parser::{ use crate::parser::{
allocated, ascii_char, ascii_hex_digits, loc, parse_utf8, unexpected, unexpected_eof, allocated, ascii_char, loc, parse_utf8, specialize_ref, unexpected_eof, word1, BadInputError,
ParseResult, Parser, State, SyntaxError, EString, Escape, ParseResult, Parser, State, SyntaxError,
}; };
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> { /// One or more ASCII hex digits. (Useful when parsing unicode escape codes,
/// which must consist entirely of ASCII hex digits.)
fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
move |arena, state: State<'a>| {
let mut buf = bumpalo::collections::String::new_in(arena);
for &byte in state.bytes.iter() {
if (byte as char).is_ascii_hexdigit() {
buf.push(byte as char);
} else if buf.is_empty() {
// We didn't find any hex digits!
return Err((
NoProgress,
EString::CodePointEnd(state.line, state.column),
state,
));
} else {
let state = state.advance_without_indenting_ee(buf.len(), |r, c| {
EString::Space(BadInputError::LineTooLong, r, c)
})?;
return Ok((MadeProgress, buf.into_bump_str(), state));
}
}
Err((
NoProgress,
EString::CodePointEnd(state.line, state.column),
state,
))
}
}
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
use StrLiteral::*; use StrLiteral::*;
move |arena: &'a Bump, mut state: State<'a>| { move |arena: &'a Bump, mut state: State<'a>| {
let mut bytes = state.bytes.iter(); let is_multiline;
// String literals must start with a quote. let mut bytes;
// If this doesn't, it must not be a string literal!
match bytes.next() { if state.bytes.starts_with(b"\"\"\"") {
Some(&byte) => { // we will be parsing a multi-string
if byte != b'"' { is_multiline = true;
return Err(unexpected(0, Attempting::StrLiteral, state)); bytes = state.bytes[3..].iter()
} } else if state.bytes.starts_with(b"\"") {
} // we will be parsing a single-string
None => { is_multiline = true;
return Err(unexpected_eof(arena, state, 0)); bytes = state.bytes[1..].iter()
} } else {
return Err((NoProgress, EString::Open(state.line, state.column), state));
} }
// String literals must start with a quote.
// If this doesn't, it must not be a string literal!
// Advance past the opening quotation mark. // Advance past the opening quotation mark.
state = state.advance_without_indenting(1)?; state = state.advance_without_indenting_ee(1, |r, c| {
EString::Space(BadInputError::LineTooLong, r, c)
})?;
// At the parsing stage we keep the entire raw string, because the formatter // At the parsing stage we keep the entire raw string, because the formatter
// needs the raw string. (For example, so it can "remember" whether you // needs the raw string. (For example, so it can "remember" whether you
@ -38,13 +77,21 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
let mut segment_parsed_bytes = 0; let mut segment_parsed_bytes = 0;
let mut segments = Vec::new_in(arena); let mut segments = Vec::new_in(arena);
macro_rules! advance_state {
($state:expr, $n:expr) => {
$state.advance_without_indenting_ee($n, |r, c| {
EString::Space(BadInputError::LineTooLong, r, c)
})
};
}
macro_rules! escaped_char { macro_rules! escaped_char {
($ch:expr) => { ($ch:expr) => {
// Record the escaped char. // Record the escaped char.
segments.push(StrSegment::EscapedChar($ch)); segments.push(StrSegment::EscapedChar($ch));
// Advance past the segment we just added // Advance past the segment we just added
state = state.advance_without_indenting(segment_parsed_bytes)?; state = advance_state!(state, segment_parsed_bytes)?;
// Reset the segment // Reset the segment
segment_parsed_bytes = 0; segment_parsed_bytes = 0;
@ -63,12 +110,16 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
match parse_utf8(string_bytes) { match parse_utf8(string_bytes) {
Ok(string) => { Ok(string) => {
state = state.advance_without_indenting(string.len())?; state = advance_state!(state, string.len())?;
segments.push($transform(string)); segments.push($transform(string));
} }
Err(reason) => { Err(reason) => {
return state.fail(arena, MadeProgress, reason); return Err((
MadeProgress,
EString::Space(BadInputError::BadUtf8, state.line, state.column),
state,
));
} }
} }
} }
@ -98,14 +149,15 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
// If the very first three chars were all `"`, // If the very first three chars were all `"`,
// then this literal begins with `"""` // then this literal begins with `"""`
// and is a block string. // and is a block string.
return parse_block_string(arena, state, &mut bytes); // return parse_block_string(arena, state, &mut bytes);
todo!()
} }
_ => { _ => {
// Advance 1 for the close quote // Advance 1 for the close quote
return Ok(( return Ok((
MadeProgress, MadeProgress,
PlainLine(""), PlainLine(""),
state.advance_without_indenting(1)?, advance_state!(state, 1)?,
)); ));
} }
} }
@ -128,7 +180,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
}; };
// Advance the state 1 to account for the closing `"` // Advance the state 1 to account for the closing `"`
return Ok((MadeProgress, expr, state.advance_without_indenting(1)?)); return Ok((MadeProgress, expr, advance_state!(state, 1)?));
}; };
} }
b'\n' => { b'\n' => {
@ -137,9 +189,9 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
// all remaining chars. This will mask all other errors, but // all remaining chars. This will mask all other errors, but
// it should make it easiest to debug; the file will be a giant // it should make it easiest to debug; the file will be a giant
// error starting from where the open quote appeared. // error starting from where the open quote appeared.
return Err(unexpected( return Err((
state.bytes.len() - 1, MadeProgress,
Attempting::StrLiteral, EString::EndlessSingle(state.line, state.column),
state, state,
)); ));
} }
@ -158,7 +210,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
match bytes.next() { match bytes.next() {
Some(b'(') => { Some(b'(') => {
// Advance past the `\(` before using the expr parser // Advance past the `\(` before using the expr parser
state = state.advance_without_indenting(2)?; state = advance_state!(state, 2)?;
let original_byte_count = state.bytes.len(); let original_byte_count = state.bytes.len();
@ -166,8 +218,10 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
// Parse an arbitrary expression, then give a // Parse an arbitrary expression, then give a
// canonicalization error if that expression variant // canonicalization error if that expression variant
// is not allowed inside a string interpolation. // is not allowed inside a string interpolation.
let (_progress, loc_expr, new_state) = let (_progress, loc_expr, new_state) = specialize_ref(
skip_second!(loc(allocated(expr::expr(0))), ascii_char(b')')) |e, _, _| EString::Format(e),
skip_second!(loc(allocated(expr::expr(0))), ascii_char(b')')),
)
.parse(arena, state)?; .parse(arena, state)?;
// Advance the iterator past the expr we just parsed. // Advance the iterator past the expr we just parsed.
@ -183,7 +237,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
} }
Some(b'u') => { Some(b'u') => {
// Advance past the `\u` before using the expr parser // Advance past the `\u` before using the expr parser
state = state.advance_without_indenting(2)?; state = advance_state!(state, 2)?;
let original_byte_count = state.bytes.len(); let original_byte_count = state.bytes.len();
@ -191,9 +245,9 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
// give a canonicalization error if the digits form // give a canonicalization error if the digits form
// an invalid unicode code point. // an invalid unicode code point.
let (_progress, loc_digits, new_state) = between!( let (_progress, loc_digits, new_state) = between!(
ascii_char(b'('), word1(b'(', EString::CodePointOpen),
loc(ascii_hex_digits()), loc(ascii_hex_digits()),
ascii_char(b')') word1(b')', EString::CodePointEnd)
) )
.parse(arena, state)?; .parse(arena, state)?;
@ -227,9 +281,9 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
// Invalid escape! A backslash must be followed // Invalid escape! A backslash must be followed
// by either an open paren or else one of the // by either an open paren or else one of the
// escapable characters (\n, \t, \", \\, etc) // escapable characters (\n, \t, \", \\, etc)
return Err(unexpected( return Err((
state.bytes.len() - 1, MadeProgress,
Attempting::StrLiteral, EString::StringEscape(Escape::EscapeUnknown),
state, state,
)); ));
} }
@ -242,7 +296,11 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, SyntaxError<'a>> {
} }
// We ran out of characters before finding a closed quote // We ran out of characters before finding a closed quote
Err(unexpected_eof(arena, state.clone(), state.bytes.len())) Err((
MadeProgress,
EString::EndlessSingle(state.line, state.column),
state,
))
} }
} }