Get things compiling

This commit is contained in:
Richard Feldman 2020-08-26 21:12:44 -04:00
parent 70bef827a7
commit f35e43768a
18 changed files with 541 additions and 253 deletions

View file

@ -84,6 +84,26 @@ pub struct WhenPattern<'a> {
pub guard: Option<Loc<Expr<'a>>>,
}
#[derive(Clone, Debug, PartialEq)]
pub enum StrSegment<'a> {
Plaintext(&'a str), // e.g. "foo"
Unicode(Loc<&'a str>), // e.g. "00A0" in "\u(00A0)"
EscapedChar(char), // e.g. '\n' in "Hello!\n"
Interpolated {
// e.g. "App.version" in "Version: \(App.version)"
module_name: &'a str,
ident: &'a str,
region: Region,
},
}
#[derive(Clone, Debug, PartialEq)]
pub enum StrLiteral<'a> {
PlainLine(&'a str),
LineWithEscapes(&'a [StrSegment<'a>]),
Block(&'a [&'a [StrSegment<'a>]]),
}
/// A parsed expression. This uses lifetimes extensively for two reasons:
///
/// 1. It uses Bump::alloc for all allocations, which returns a reference.
@ -105,8 +125,7 @@ pub enum Expr<'a> {
},
// String Literals
Str(&'a str),
BlockStr(&'a [&'a str]),
Str(StrLiteral<'a>), // string without escapes in it
/// Look up exactly one field on a record, e.g. (expr).foo.
Access(&'a Expr<'a>, &'a str),
/// e.g. `.foo`
@ -336,8 +355,7 @@ pub enum Pattern<'a> {
is_negative: bool,
},
FloatLiteral(&'a str),
StrLiteral(&'a str),
BlockStrLiteral(&'a [&'a str]),
StrLiteral(StrLiteral<'a>),
Underscore,
// Space
@ -455,7 +473,6 @@ impl<'a> Pattern<'a> {
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
(FloatLiteral(x), FloatLiteral(y)) => x == y,
(StrLiteral(x), StrLiteral(y)) => x == y,
(BlockStrLiteral(x), BlockStrLiteral(y)) => x == y,
(Underscore, Underscore) => true,
// Space
@ -584,7 +601,7 @@ impl<'a> Spaceable<'a> for Def<'a> {
pub enum Attempting {
List,
Keyword,
StringLiteral,
StrLiteral,
RecordLiteral,
RecordFieldLabel,
InterpolatedString,

View file

@ -300,12 +300,8 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
base: *base,
is_negative: *is_negative,
}),
Expr::Str(string) => Ok(Pattern::StrLiteral(string)),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
// These would not have parsed as patterns
Expr::BlockStr(_)
| Expr::AccessorFunction(_)
Expr::AccessorFunction(_)
| Expr::Access(_, _)
| Expr::List(_)
| Expr::Closure(_, _)
@ -322,6 +318,9 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
attempting: Attempting::Def,
reason: FailReason::InvalidPattern,
}),
Expr::Str(string) => Ok(Pattern::StrLiteral(string.clone())),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
}
}
@ -580,11 +579,7 @@ fn annotation_or_alias<'a>(
QualifiedIdentifier { .. } => {
panic!("TODO gracefully handle trying to annotate a qualified identifier, e.g. `Foo.bar : ...`");
}
NumLiteral(_)
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_) => {
NumLiteral(_) | NonBase10Literal { .. } | FloatLiteral(_) | StrLiteral(_) => {
panic!("TODO gracefully handle trying to annotate a litera");
}
Underscore => {
@ -916,10 +911,7 @@ fn number_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
}
fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map!(crate::string_literal::parse(), |result| match result {
crate::string_literal::StringLiteral::Line(string) => Pattern::StrLiteral(string),
crate::string_literal::StringLiteral::Block(lines) => Pattern::BlockStrLiteral(lines),
})
map!(crate::string_literal::parse(), Pattern::StrLiteral)
}
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
@ -1789,8 +1781,5 @@ pub fn global_tag<'a>() -> impl Parser<'a, &'a str> {
}
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
map!(crate::string_literal::parse(), |result| match result {
crate::string_literal::StringLiteral::Line(string) => Expr::Str(string),
crate::string_literal::StringLiteral::Block(lines) => Expr::BlockStr(lines),
})
map!(crate::string_literal::parse(), Expr::Str)
}

View file

@ -1,71 +1,110 @@
use crate::ast::Attempting;
use crate::ast::{Attempting, StrLiteral, StrSegment};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
pub enum StringLiteral<'a> {
Line(&'a str),
Block(&'a [&'a str]),
}
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> {
use StrLiteral::*;
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
move |arena: &'a Bump, state: State<'a>| {
let mut bytes = state.bytes.iter();
// String literals must start with a quote.
// If this doesn't, it must not be a string literal!
match bytes.next() {
Some(&byte) => {
if byte != b'"' {
return Err(unexpected(0, state, Attempting::StringLiteral));
return Err(unexpected(0, state, Attempting::StrLiteral));
}
}
None => {
return Err(unexpected_eof(0, Attempting::StringLiteral, state));
return Err(unexpected_eof(0, Attempting::StrLiteral, state));
}
}
// The current segment begins right after the opening quotation mark.
let mut cur_segment = &state.bytes[1..];
enum EscapeState {
None,
Unicode,
Interpolation,
}
// At the parsing stage we keep the entire raw string, because the formatter
// needs the raw string. (For example, so it can "remember" whether you
// wrote \u{...} or the actual unicode character itself.)
//
// Later, in canonicalization, we'll do things like resolving
// unicode escapes and string interpolation.
//
// Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1;
let mut prev_byte = b'"';
let mut total_parsed_chars = 1;
let mut segment_parsed_chars = 0;
let mut segments = Vec::new_in(arena);
let mut escape_state = EscapeState::None;
// pub enum StrSegment<'a> {
// Plaintext(&'a str), // e.g. "foo"
// Unicode(&'a str), // e.g. "00A0" in "\u(00A0)"
// Interpolated(&'a str), // e.g. "name" in "Hi, \(name)!"
// EscapedChar(char), // e.g. '\n' in "Hello!\n"
// }
while let Some(&byte) = bytes.next() {
parsed_chars += 1;
segment_parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!)
match byte {
b'"' if prev_byte != b'\\' => {
let (string, state) = if parsed_chars == 2 {
match bytes.next() {
Some(b'"') => {
// If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut bytes);
b'"' => {
// If we aren't escaping, then this is the end of the string!
if let EscapeState::None = escape_state {
let (literal, state) = if total_parsed_chars == 1 && segments.is_empty() {
match bytes.next() {
Some(b'"') => {
// If the very first three chars were all `"`,
// then this literal begins with `"""`
// and is a block string.
return parse_block_string(arena, state, &mut bytes);
}
_ => (PlainLine(""), state.advance_without_indenting(2)?),
}
_ => ("", state.advance_without_indenting(2)?),
}
} else {
// Subtract 1 from parsed_chars so we omit the closing `"`.
let string_bytes = &cur_segment[0..(segment_parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => {
total_parsed_chars += segment_parsed_chars;
let state =
state.advance_without_indenting(total_parsed_chars)?;
if segments.is_empty() {
// We only had one segment.
(StrLiteral::PlainLine(string), state)
} else {
// We had multiple segments! Parse the
// current one and add it to the list.
segments.push(StrSegment::Plaintext(string));
(LineWithEscapes(segments.into_bump_slice()), state)
}
}
Err(reason) => {
return state.fail(reason);
}
}
};
return Ok((literal, state));
} else {
// Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`.
let string_bytes = &state.bytes[1..(parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?),
Err(reason) => {
return state.fail(reason);
}
}
};
return Ok((StringLiteral::Line(string), state));
// We are escaping, so this is an error. (If it were an
// escaped single character like \" then we would have
// handled that scenario already.)
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
b'\n' => {
// This is a single-line string, which cannot have newlines!
@ -76,19 +115,90 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StringLiteral,
Attempting::StrLiteral,
));
}
b')' => {
// All escape sequences end in a close paren, so we don't
// need to pay for a conditional here. If it was an escape,
// then we want to set it to None, and if it wasn't an
// escape, then setting it from None to None is harmless!
// (And likely cheaper than a conditional.)
escape_state = EscapeState::None;
}
b'\\' => {
// This is the start of a new escape
if let EscapeState::None = escape_state {
match bytes.next() {
Some(b'(') => {
// This is an interpolated variable
escape_state = EscapeState::Interpolation;
todo!("Parse interpolated ident");
}
Some(b'u') => {
escape_state = EscapeState::Unicode;
// This is an escaped unicode character
todo!("Parse '(' and then parse escaped unicode character");
}
Some(ch @ b'\n') | Some(ch @ b'\t') | Some(ch @ b'\r')
| Some(ch @ b'"') | Some(ch @ b'\\') => {
// Record the current segment so we can begin a new one.
match parse_utf8(cur_segment) {
Ok(string) => {
segments.push(StrSegment::Plaintext(string));
}
Err(reason) => {
return state.fail(reason);
}
}
// Record the escaped char.
segments.push(StrSegment::EscapedChar(*ch as char));
// We're now done escaping.
escape_state = EscapeState::None;
// Advance past the segment we just added, and
// also past the escaped char we just added.
//
// +2 because we just parsed a backslash and
// one other char after it.
cur_segment = &cur_segment[(segment_parsed_chars + 2)..];
// Reset segment_parsed_chars to 0 because we're now
// parsing the beginning of a new segment.
segment_parsed_chars = 0;
}
_ => {
// Invalid escape! A backslash must be followed
// by either an open paren or else one of the
// escapable characters (\n, \t, \", \\, etc)
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
} else {
// Can't have a \ inside an escape!
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
_ => {
prev_byte = byte;
// All other characters need no special handling.
}
}
}
// We ran out of characters before finding a closed quote
Err(unexpected_eof(
parsed_chars,
Attempting::StringLiteral,
total_parsed_chars,
Attempting::StrLiteral,
state.clone(),
))
}
@ -98,7 +208,7 @@ fn parse_block_string<'a, I>(
arena: &'a Bump,
state: State<'a>,
bytes: &mut I,
) -> ParseResult<'a, StringLiteral<'a>>
) -> ParseResult<'a, StrLiteral<'a>>
where
I: Iterator<Item = &'a u8>,
{
@ -125,12 +235,13 @@ where
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
return match parse_utf8(line_bytes) {
Ok(line) => {
let state = state.advance_without_indenting(parsed_chars)?;
Ok(_line) => {
// let state = state.advance_without_indenting(parsed_chars)?;
lines.push(line);
// lines.push(line);
Ok((StringLiteral::Block(arena.alloc(lines)), state))
// Ok((StrLiteral::Block(lines.into_bump_slice()), state))
todo!("TODO finish making block strings accept escapes");
}
Err(reason) => state.fail(reason),
};
@ -164,8 +275,8 @@ where
// We ran out of characters before finding 3 closing quotes
Err(unexpected_eof(
parsed_chars,
// TODO custom BlockStringLiteral?
Attempting::StringLiteral,
// TODO custom BlockStrLiteral?
Attempting::StrLiteral,
state,
))
}