Record raw strings during parse step

2025-10-03 00:24:34 +00:00 · 2019-09-16 00:25:31 -04:00 · 2019-09-16 00:25:31 -04:00 · d54cf81f7b
commit d54cf81f7b
parent fa9e074488
40 changed files with 4111 additions and 7400 deletions
--- a/src/parse/ast.rs
+++ b/src/parse/ast.rs
@ -1,9 +1,8 @@
+use bumpalo::collections::vec::Vec;
 use operator::Operator;
-use parse::problems::Problem;
 use region::Loc;
 use std::fmt::{self, Display, Formatter};

-pub type Ident = str;
 pub type VariantName = str;

 /// A parsed expression. This uses lifetimes extensively for two reasons:
@ -23,50 +22,45 @@ pub type VariantName = str;
 #[derive(Clone, Debug, PartialEq)]
 pub enum Expr<'a> {
    // Number Literals
-    Int(i64),
-    Float(f64),
+    Float(&'a str),
+    Int(&'a str),
+    HexInt(&'a str),
+    OctalInt(&'a str),
+    BinaryInt(&'a str),

    // String Literals
    EmptyStr,
    Str(&'a str),
-    /// basically InterpolatedStr(Vec<(String, Loc<Expr>)>, String)
-    InterpolatedStr(&'a (&'a [(&'a str, Loc<Expr<'a>>)], &'a str)),
+    BlockStr(&'a [&'a str]),

    // List literals
    EmptyList,
-    List(&'a [Loc<Expr<'a>>]),
+    List(Vec<'a, Loc<Expr<'a>>>),
+    // // Lookups
+    // Var(&'a str),

-    // Lookups
-    Var(&'a Ident),
+    // // Pattern Matching
+    // Case(&'a (Loc<Expr<'a>>, [(Loc<Pattern<'a>>, Loc<Expr<'a>>)])),
+    // Closure(&'a (&'a [Loc<Pattern<'a>>], Loc<Expr<'a>>)),
+    // /// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
+    // Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),

-    // Pattern Matching
-    Case(&'a (Loc<Expr<'a>>, [(Loc<Pattern<'a>>, Loc<Expr<'a>>)])),
-    Closure(&'a (&'a [Loc<Pattern<'a>>], Loc<Expr<'a>>)),
-    /// basically Assign(Vec<(Loc<Pattern>, Loc<Expr>)>, Loc<Expr>)
-    Assign(&'a (&'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)], Loc<Expr<'a>>)),
-
-    // Application
-    Call(&'a (Loc<Expr<'a>>, [Loc<Expr<'a>>])),
-    ApplyVariant(&'a (&'a VariantName, [Loc<Expr<'a>>])),
-    Variant(&'a VariantName),
+    // // Application
+    // Call(&'a (Loc<Expr<'a>>, [Loc<Expr<'a>>])),
+    // ApplyVariant(&'a (&'a VariantName, [Loc<Expr<'a>>])),
+    // Variant(&'a VariantName),

    // Product Types
    EmptyRecord,
-
-    // Sugar
-    If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)),
+    // // Sugar
+    // If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)),
    Operator(&'a (Loc<Expr<'a>>, Loc<Operator>, Loc<Expr<'a>>)),
-
-    // Runtime errors
-    MalformedStr(Box<[Loc<Problem>]>),
-    MalformedInt(Problem),
-    MalformedFloat(Problem),
 }

 #[derive(Clone, Debug, PartialEq)]
 pub enum Pattern<'a> {
    // Identifier
-    Identifier(&'a Ident),
+    Identifier(&'a str),

    // Variant
    Variant(&'a VariantName),
@ -82,33 +76,35 @@ pub enum Pattern<'a> {

 #[test]
 fn expr_size() {
-    // The size of the Expr data structure should be exactly 3 machine words.
+    // The size of the Expr data structure should be exactly 5 machine words.
    // This test helps avoid regressions wich accidentally increase its size!
-    //
-    // Worth noting that going up to 4 machine words is probably not a big deal;
-    // an 8-byte cache line will only fit 2 of these regardless.
    assert_eq!(
        std::mem::size_of::<Expr>(),
        // TODO [move this comment to an issue] We should be able to get this
        // down to 2, which would mean we could fit 4 of these nodes in a single
-        // 64-byte cache line instead of only being able to fit 2.
+        // 64-byte cache line instead of only being able to fit 1.
        //
        // Doing this would require, among other things:
        // 1. Making a str replacement where the length is stored as u32 instead of usize,
        //    to leave room for the tagged union's u8 tag.
        //    (Alternatively could store it as (&'a &'a str), but ew.)
-        // 2. Figuring out why &'a (Foo, Bar) by default takes up 24 bytes in Rust.
+        // 2. Similarly, making a slice replacement like that str replacement, and
+        //    also where it doesn't share the bytes with anything else - so its
+        //    elements can be consumed without having to clone them (unlike a slice).
+        //    That's the only reason we're using Vec right now instead of slices -
+        //    if we used slices, we'd have to clone their elements during canonicalization
+        //    just to iterate over them and canonicalize them normally.
+        // 3. Figuring out why (&'a (Foo, Bar)) by default takes up 24 bytes in Rust.
        //    I assume it's because the struct is being stored inline instead of
        //    as a pointer, but in this case we actually do want the pointer!
        //    We want to have the lifetime and we want to avoid using the unsafe keyword,
        //    but we also want this to only store 1 pointer in the AST node.
        //    Hopefully there's a way!
        //
-        // It's also possible that going up to 4 machine words might yield even
-        // better performance, due to more data structures being inlinable,
-        // and therefore having fewer pointers to chase. This seems worth
-        // investigating as well.
-        std::mem::size_of::<usize>() * 3
+        // It's also possible that 4 machine words might yield better performance
+        // than 2, due to more data structures being inlinable, and therefore
+        // having fewer pointers to chase. This seems worth investigating as well.
+        std::mem::size_of::<usize>() * 5
    );
 }

@ -151,6 +147,7 @@ pub enum Attempting {
    List,
    Keyword,
    StringLiteral,
+    RecordLiteral,
    InterpolatedString,
    NumberLiteral,
    UnicodeEscape,
@ -165,7 +162,15 @@ impl<'a> Display for Expr<'a> {

        match self {
            EmptyStr => write!(f, "\"\""),
-            _ => panic!("TODO"),
+            Str(string) => write!(f, "\"{}\"", string),
+            BlockStr(lines) => write!(f, "\"\"\"{}\"\"\"", lines.join("\n")),
+            Int(string) => string.fmt(f),
+            Float(string) => string.fmt(f),
+            HexInt(string) => write!(f, "0x{}", string),
+            BinaryInt(string) => write!(f, "0b{}", string),
+            OctalInt(string) => write!(f, "0o{}", string),
+            EmptyRecord => write!(f, "{}", "{}"),
+            other => panic!("TODO implement Display for AST variant {:?}", other),
        }
    }
 }
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -1,18 +1,76 @@
 pub mod ast;
 pub mod ident;
+pub mod module;
 pub mod number_literal;
 pub mod parser;
 pub mod problems;
 pub mod string_literal;

+use bumpalo::Bump;
+use operator::Operator;
 use parse::ast::{Attempting, Expr};
 use parse::number_literal::number_literal;
-use parse::parser::{attempt, one_of2, Parser};
+use parse::parser::{
+    and, attempt, lazy, loc, map, map_with_arena, one_of3, optional, string, unexpected,
+    unexpected_eof, val, Parser, State,
+};
 use parse::string_literal::string_literal;

 pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
-    attempt(
-        Attempting::Expression,
-        one_of2(number_literal(), string_literal()),
+    map_with_arena(
+        and(
+            attempt(
+                Attempting::Expression,
+                loc(one_of3(
+                    record_literal(),
+                    number_literal(),
+                    string_literal(),
+                )),
+            ),
+            optional(and(loc(operator()), loc(val(Expr::Str("blah"))))),
+        ),
+        |arena, (loc_expr1, opt_operator)| match opt_operator {
+            Some((loc_op, loc_expr2)) => {
+                let tuple = arena.alloc((loc_expr1, loc_op, loc_expr2));
+
+                Expr::Operator(tuple)
+            }
+            None => loc_expr1.value,
+        },
    )
 }
+
+pub fn operator<'a>() -> impl Parser<'a, Operator> {
+    val(Operator::Plus)
+    // one_of3(
+    //     map(string("+"), |_| Operator::Plus),
+    //     map(string("-"), |_| Operator::Minus),
+    //     map(string("*"), |_| Operator::Star),
+    // )
+}
+
+pub fn record_literal<'a>() -> impl Parser<'a, Expr<'a>> {
+    move |_arena: &'a Bump, state: State<'a>| {
+        let mut chars = state.input.chars();
+
+        match chars.next() {
+            Some('{') => (),
+            Some(other_char) => {
+                return Err(unexpected(other_char, 0, state, Attempting::RecordLiteral));
+            }
+            None => {
+                return Err(unexpected_eof(0, Attempting::RecordLiteral, state));
+            }
+        }
+
+        match chars.next() {
+            Some('}') => {
+                let next_state = state.advance_without_indenting(2)?;
+
+                Ok((Expr::EmptyRecord, next_state))
+            }
+            Some(other_char) => Err(unexpected(other_char, 0, state, Attempting::RecordLiteral)),
+            None => Err(unexpected_eof(0, Attempting::RecordLiteral, state)),
+        }
+    }
+}
--- a/src/parse/module.rs
+++ b/src/parse/module.rs
@ -0,0 +1,16 @@
+use ident::Ident;
+use parse::ast::{Expr, Pattern};
+
+pub struct Module<'a> {
+    pub name: Ident,
+    pub exposes: Vec<Ident>,
+    pub uses: Vec<Ident>,
+    pub decls: Vec<Decl<'a>>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum Decl<'a> {
+    Def(Pattern<'a>, Expr<'a>, Expr<'a>),
+    // TODO Alias
+    // TODO SumType
+}
--- a/src/parse/number_literal.rs
+++ b/src/parse/number_literal.rs
@ -1,19 +1,16 @@
-use bumpalo::collections::string::String;
-use bumpalo::Bump;
 use parse::ast::{Attempting, Expr};
 use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
-use parse::problems::Problem;
 use std::char;

 pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
-    move |arena: &'a Bump, state: State<'a>| {
+    move |_arena, state: State<'a>| {
        let mut chars = state.input.chars();

        match chars.next() {
            Some(first_ch) => {
                // Number literals must start with either an '-' or a digit.
                if first_ch == '-' || first_ch.is_ascii_digit() {
-                    parse_number_literal(first_ch, &mut chars, arena, state)
+                    parse_number_literal(first_ch, &mut chars, state)
                } else {
                    Err(unexpected(
                        first_ch,
@ -32,61 +29,69 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
 fn parse_number_literal<'a, I>(
    first_ch: char,
    chars: &mut I,
-    arena: &'a Bump,
    state: State<'a>,
 ) -> ParseResult<'a, Expr<'a>>
 where
    I: Iterator<Item = char>,
 {
-    let mut before_decimal = String::with_capacity_in(1, arena);
-    let mut after_decimal = String::new_in(arena);
-    let mut has_decimal_point = false;
-    let mut chars_skipped = 0;
+    use self::LiteralType::*;

-    // Put the first character into the buffer, even if all we've parsed so
-    // far is a minus sign.
-    //
-    // We have to let i64::parse handle the minus sign (if it's there), because
-    // otherwise if we ask it to parse i64::MIN.to_string() as a positive i64,
-    // it errors because that positive number doesn't fit in an i64!
-    before_decimal.push(first_ch);
+    let mut typ = Int;
+
+    // We already parsed 1 character (which may have been a minus sign).
+    let mut chars_parsed = 1;

    while let Some(next_ch) = chars.next() {
-        match next_ch {
-            digit if next_ch.is_ascii_digit() => {
-                if has_decimal_point {
-                    after_decimal.push(digit);
-                } else {
-                    before_decimal.push(digit);
-                }
-            }
-            '_' => {
-                // Underscores are allowed, and disregarded.
-                chars_skipped += 1;
-            }
-            '.' => {
-                if has_decimal_point {
-                    // You only get one decimal point!
-                    let len = before_decimal.len() + after_decimal.len() + chars_skipped;
+        chars_parsed += 1;

-                    return Err(unexpected('.', len, state, Attempting::NumberLiteral));
-                } else {
-                    chars_skipped += 1;
-                    has_decimal_point = true;
-                }
-            }
-            invalid_char => {
-                if before_decimal.is_empty() {
-                    // No digits! We likely parsed a minus sign that's actually an operator.
-                    let len = before_decimal.len() + after_decimal.len() + chars_skipped;
-                    return Err(unexpected(
-                        invalid_char,
-                        len,
-                        state,
-                        Attempting::NumberLiteral,
-                    ));
-                }
+        let err_unexpected = || {
+            Err(unexpected(
+                next_ch,
+                chars_parsed,
+                state.clone(),
+                Attempting::NumberLiteral,
+            ))
+        };

+        // Returns true iff so far we have parsed the given char and no other chars.
+        let so_far_parsed = |ch| chars_parsed == 2 && first_ch == ch;
+
+        // We don't support negative escaped ints (e.g. 0x01 is supported but -0x01 is not).
+        // If you want that, do something like (negate 0x01).
+        //
+        // I'm open to changing this policy (that is, allowing support for
+        // negative escaped ints), but it'll complicate parsing logic and seems
+        // nonessential, so I'm leaving it out for now.
+        if next_ch == '.' {
+            if typ == Float {
+                // You only get one decimal point!
+                return err_unexpected();
+            } else {
+                typ = Float;
+            }
+        } else if next_ch == 'x' {
+            if so_far_parsed('0') {
+                typ = Hex;
+            } else {
+                return err_unexpected();
+            }
+        } else if next_ch == 'b' {
+            if so_far_parsed('0') {
+                typ = Binary;
+            } else {
+                return err_unexpected();
+            }
+        } else if next_ch == 'o' {
+            if so_far_parsed('0') {
+                typ = Octal;
+            } else {
+                return err_unexpected();
+            }
+        } else if !next_ch.is_ascii_digit() && next_ch != '_' {
+            if so_far_parsed('-') {
+                // No digits! We likely parsed a minus sign that's actually an operator.
+                return err_unexpected();
+            } else {
                // We hit an invalid number literal character; we're done!
                break;
            }
@ -96,41 +101,25 @@ where
    // At this point we have a number, and will definitely succeed.
    // If the number is malformed (outside the supported range),
    // we'll succeed with an appropriate Expr which records that.
-    let expr = if has_decimal_point {
-        let mut f64_buf = String::with_capacity_in(
-            before_decimal.len()
-            // +1 for the decimal point itself
-            + 1
-            + after_decimal.len(),
-            arena,
-        );
-
-        f64_buf.push_str(&before_decimal);
-        f64_buf.push('.');
-        f64_buf.push_str(&after_decimal);
-
-        // TODO [convert this comment to an issue] - we can get better
-        // performance here by inlining string.parse() for the f64 case,
-        // since we've already done the work of validating that each char
-        // is a digit, plus we also already separately parsed the minus
-        // sign and dot.
-        match f64_buf.parse::<f64>() {
-            Ok(float) if float.is_finite() => Expr::Float(float),
-            _ => Expr::MalformedFloat(Problem::OutsideSupportedRange),
-        }
-    } else {
-        // TODO [convert this comment to an issue] - we can get better
-        // performance here by inlining string.parse() for the i64 case,
-        // since we've already done the work of validating that each char
-        // is a digit.
-        match before_decimal.parse::<i64>() {
-            Ok(int_val) => Expr::Int(int_val),
-            Err(_) => Expr::MalformedInt(Problem::OutsideSupportedRange),
-        }
+    let expr = match typ {
+        Int => Expr::Int(&state.input[0..chars_parsed]),
+        Float => Expr::Float(&state.input[0..chars_parsed]),
+        // For these we trim off the 0x/0o/0b part
+        Hex => Expr::HexInt(&state.input[2..chars_parsed - 1]),
+        Binary => Expr::BinaryInt(&state.input[2..chars_parsed - 1]),
+        Octal => Expr::OctalInt(&state.input[2..chars_parsed - 1]),
    };

-    let total_chars_parsed = before_decimal.len() + chars_skipped;
-    let state = state.advance_without_indenting(total_chars_parsed)?;
+    let next_state = state.advance_without_indenting(chars_parsed)?;

-    Ok((expr, state))
+    Ok((expr, next_state))
+}
+
+#[derive(Debug, PartialEq, Eq)]
+enum LiteralType {
+    Int,
+    Float,
+    Hex,
+    Octal,
+    Binary,
 }
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@ -1,7 +1,7 @@
 use bumpalo::collections::vec::Vec;
 use bumpalo::Bump;
 use parse::ast::Attempting;
-use region::Region;
+use region::{Located, Region};
 use std::char;

 // Strategy:
@ -190,6 +190,21 @@ pub trait Parser<'a, Output> {
    fn parse(&self, &'a Bump, State<'a>) -> ParseResult<'a, Output>;
 }

+pub struct BoxedParser<'a, Output> {
+    parser: &'a (dyn Parser<'a, Output> + 'a),
+}
+
+impl<'a, Output> BoxedParser<'a, Output> {
+    fn new<P>(arena: &'a Bump, parser: P) -> Self
+    where
+        P: Parser<'a, Output> + 'a,
+    {
+        BoxedParser {
+            parser: arena.alloc(parser),
+        }
+    }
+}
+
 impl<'a, F, Output> Parser<'a, Output> for F
 where
    F: Fn(&'a Bump, State<'a>) -> ParseResult<'a, Output>,
@ -199,6 +214,22 @@ where
    }
 }

+pub fn val<'a, Val>(value: Val) -> impl Parser<'a, Val>
+where
+    Val: Clone,
+{
+    move |_, state| Ok((value.clone(), state))
+}
+
+/// Needed for recursive parsers
+pub fn lazy<'a, F, P, Val>(get_parser: F) -> impl Parser<'a, Val>
+where
+    F: Fn() -> P,
+    P: Parser<'a, Val>,
+{
+    move |arena, state| get_parser().parse(arena, state)
+}
+
 pub fn map<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
 where
    P: Parser<'a, Before>,
@ -211,6 +242,18 @@ where
    }
 }

+pub fn map_with_arena<'a, P, F, Before, After>(parser: P, transform: F) -> impl Parser<'a, After>
+where
+    P: Parser<'a, Before>,
+    F: Fn(&'a Bump, Before) -> After,
+{
+    move |arena, state| {
+        parser
+            .parse(arena, state)
+            .map(|(output, next_state)| (transform(arena, output), next_state))
+    }
+}
+
 pub fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val>
 where
    P: Parser<'a, Val>,
@ -226,6 +269,32 @@ where
    }
 }

+pub fn loc<'a, P, Val>(parser: P) -> impl Parser<'a, Located<Val>>
+where
+    P: Parser<'a, Val>,
+{
+    move |arena, state: State<'a>| {
+        let start_col = state.column;
+        let start_line = state.line;
+
+        match parser.parse(arena, state) {
+            Ok((value, state)) => {
+                let end_col = state.column;
+                let end_line = state.line;
+                let region = Region {
+                    start_col,
+                    start_line,
+                    end_col,
+                    end_line,
+                };
+
+                Ok((Located { region, value }, state))
+            }
+            Err((fail, state)) => Err((fail, state)),
+        }
+    }
+}
+
 pub fn one_or_more<'a, P, A>(parser: P) -> impl Parser<'a, Vec<'a, A>>
 where
    P: Parser<'a, A>,
@ -317,6 +386,7 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
        let input = state.input;
        let len = string.len();

+        // TODO do this comparison in one SIMD instruction (on supported systems)
        match input.get(0..len) {
            Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len)?)),
            _ => Err(unexpected_eof(len, Attempting::Keyword, state)),
@ -378,6 +448,46 @@ where
 //     satisfies(any, |ch| ch.is_whitespace())
 // }

+pub fn and<'a, P1, P2, A, B>(p1: P1, p2: P2) -> impl Parser<'a, (A, B)>
+where
+    P1: Parser<'a, A>,
+    P2: Parser<'a, B>,
+{
+    move |arena: &'a Bump, state: State<'a>| {
+        let original_attempting = state.attempting;
+
+        match p1.parse(arena, state) {
+            Ok((out1, state)) => match p2.parse(arena, state) {
+                Ok((out2, state)) => Ok(((out1, out2), state)),
+                Err((fail, state)) => Err((
+                    Fail {
+                        attempting: original_attempting,
+                        ..fail
+                    },
+                    state,
+                )),
+            },
+            Err((fail, state)) => Err((
+                Fail {
+                    attempting: original_attempting,
+                    ..fail
+                },
+                state,
+            )),
+        }
+    }
+}
+
+pub fn optional<'a, P, T>(parser: P) -> impl Parser<'a, Option<T>>
+where
+    P: Parser<'a, T>,
+{
+    move |arena: &'a Bump, state: State<'a>| match parser.parse(arena, state) {
+        Ok((out1, state)) => Ok((Some(out1), state)),
+        Err((_, state)) => Ok((None, state)),
+    }
+}
+
 pub fn one_of2<'a, P1, P2, A>(p1: P1, p2: P2) -> impl Parser<'a, A>
 where
    P1: Parser<'a, A>,
--- a/src/parse/string_literal.rs
+++ b/src/parse/string_literal.rs
@ -1,18 +1,11 @@
-use bumpalo::collections::string::String;
-use bumpalo::collections::vec::Vec;
 use bumpalo::Bump;
 use parse::ast::{Attempting, Expr};
-use parse::ident;
-use parse::parser::{unexpected, unexpected_eof, Fail, Parser, State};
-use parse::problems::{Problem, Problems};
-use region::{Loc, Region};
+use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
 use std::char;
-use std::iter::Peekable;

 pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
    move |arena: &'a Bump, state: State<'a>| {
-        let mut problems = std::vec::Vec::new();
-        let mut chars = state.input.chars().peekable();
+        let mut chars = state.input.chars();

        // String literals must start with a quote.
        // If this doesn't, it must not be a string literal!
@ -26,464 +19,75 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
            }
        }

-        // If we have precisely an empty string here, don't bother allocating
-        // a buffer; instead, return EmptyStr immediately.
-        if chars.peek() == Some(&'"') {
-            return Ok((
-                Expr::EmptyStr,
-                // 2 because `""` has length 2
-                state.advance_without_indenting(2)?,
-            ));
-        }
-
-        // Stores the accumulated string characters
-        let mut buf = String::new_in(arena);
-
-        // This caches the total string length of interpolated_pairs. Every
-        // time we add a new pair to interpolated_pairs, we increment this
-        // by the sum of whatever we parsed in order to obtain that pair.
-        let mut buf_col_offset: usize = 0;
-
-        // Stores interpolated identifiers, if any.
-        let mut interpolated_pairs = Vec::new_in(arena);
+        // At the parsing stage we keep the entire raw string, because the formatter
+        // needs the raw string. (For example, so it can "remember" whether you
+        // wrote \u{...} or the actual unicode character itself.)
+        //
+        // Later, in canonicalization, we'll do things like resolving
+        // unicode escapes and string interpolation.
+        //
+        // Since we're keeping the entire raw string, all we need to track is
+        // how many characters we've parsed. So far, that's 1 (the opening `"`).
+        let mut parsed_chars = 1;
+        let mut prev_ch = '"';

        while let Some(ch) = chars.next() {
-            match ch {
-                // If it's a backslash, escape things.
-                '\\' => match chars.next() {
-                    Some(next_ch) => {
-                        if let Some(ident) = handle_escaped_char(
-                            arena,
-                            &state,
-                            next_ch,
-                            &mut chars,
-                            &mut buf,
-                            &mut problems,
-                        )? {
-                            let expr = Expr::Var(ident);
+            parsed_chars += 1;

-                            // +2 for `\(` and then another +1 for `)` at the end
-                            let parsed_length = buf.len() + 2 + ident.len() + 1;
-
-                            // It's okay if casting fails in this section, because
-                            // we're going to check for line length overflow at the
-                            // end anyway. That will render this region useless,
-                            // but the user wasn't going to see this region
-                            // anyway if the line length overflowed.
-                            let start_line = state.line;
-
-                            // Subtract ident length and another 1 for the `)`
-                            let start_col = state.column
-                                + buf_col_offset as u16
-                                + (parsed_length - ident.len() - 1) as u16;
-                            let ident_region = Region {
-                                start_line,
-                                start_col,
-                                end_line: start_line,
-                                end_col: start_col + ident.len() as u16 - 1,
-                            };
-                            let loc_expr = Loc {
-                                region: ident_region,
-                                value: expr,
-                            };
-
-                            // Push the accumulated string into the pairs list,
-                            // along with the ident that came after it.
-                            interpolated_pairs.push((buf.into_bump_str(), loc_expr));
-
-                            // Reset the buffer so we start working on a new string.
-                            buf = String::new_in(arena);
-
-                            // Advance the cached offset of how many chars we've parsed,
-                            // so the next time we see an interpolated ident, we can
-                            // correctly calculate its region.
-                            buf_col_offset += parsed_length;
-                        }
-                    }
-                    None => {
-                        // We ran out of characters before finding a closed quote;
-                        // let the loop finish normally, so we end up returning
-                        // the error that the string was not terminated.
-                        //
-                        // (There's the separate problem of a trailing backslash,
-                        // but often that will get fixed in the course of
-                        // addressing the missing closed quote.)
-                        ()
-                    }
-                },
-                '"' => {
-                    // We found a closed quote; this is the end of the string!
-                    let len_with_quotes = buf.len() + 2;
-                    let expr = if problems.is_empty() {
-                        let final_str = buf.into_bump_str();
-
-                        if interpolated_pairs.is_empty() {
-                            Expr::Str(final_str)
-                        } else {
-                            let tuple_ref =
-                                arena.alloc((interpolated_pairs.into_bump_slice(), final_str));
-
-                            Expr::InterpolatedStr(tuple_ref)
-                        }
+            // Potentially end the string (unless this is an escaped `"`!)
+            if ch == '"' && prev_ch != '\\' {
+                let expr = if parsed_chars == 2 {
+                    if let Some('"') = chars.next() {
+                        // If the first three chars were all `"`, then this
+                        // literal begins with `"""` and is a block string.
+                        return parse_block_string(arena, state, &mut chars);
                    } else {
-                        Expr::MalformedStr(problems.into_boxed_slice())
-                    };
+                        Expr::EmptyStr
+                    }
+                } else {
+                    // Start at 1 so we omit the opening `"`.
+                    // Subtract 1 from parsed_chars so we omit the closing `"`.
+                    Expr::Str(&state.input[1..(parsed_chars - 1)])
+                };

-                    let next_state = state.advance_without_indenting(len_with_quotes)?;
+                let next_state = state.advance_without_indenting(parsed_chars)?;

-                    return Ok((expr, next_state));
-                }
-                '\t' => {
-                    // Report the problem and continue. Tabs are syntax errors,
-                    // but maybe the rest of the string is fine!
-                    problems.push(loc_char(Problem::Tab, &state, buf.len()));
-                }
-                '\r' => {
-                    // Carriage returns aren't allowed in string literals,
-                    // but maybe the rest of the string is fine!
-                    problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
-                }
-                '\n' => {
-                    // We hit a newline before a close quote.
-                    // We can't safely assume where the string was supposed
-                    // to end, so this is an unrecoverable error.
-                    return Err(unexpected('\n', 0, state, Attempting::StringLiteral));
-                }
-                normal_char => buf.push(normal_char),
+                return Ok((expr, next_state));
+            } else if ch == '\n' {
+                // This is a single-line string, which cannot have newlines!
+                // Treat this as an unclosed string literal, and consume
+                // all remaining chars. This will mask all other errors, but
+                // it should make it easiest to debug; the file will be a giant
+                // error starting from where the open quote appeared.
+                return Err(unexpected(
+                    '\n',
+                    state.input.len() - 1,
+                    state,
+                    Attempting::StringLiteral,
+                ));
+            } else {
+                prev_ch = ch;
            }
        }

        // We ran out of characters before finding a closed quote
        Err(unexpected_eof(
-            buf.len(),
+            parsed_chars,
            Attempting::StringLiteral,
            state.clone(),
        ))
    }
 }

-fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
-    let start_line = state.line;
-    let start_col = state.column + buf_len as u16;
-    let end_line = start_line;
-    // All invalid chars should have a length of 1
-    let end_col = state.column + 1;
-
-    let region = Region {
-        start_line,
-        start_col,
-        end_line,
-        end_col,
-    };
-
-    Loc { region, value }
-}
-
-fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
-    let start_line = state.line;
-    let start_col = state.column + buf_len as u16;
-    let end_line = start_line;
-    // escapes should all be 2 chars long
-    let end_col = state.column + 1;
-
-    let region = Region {
-        start_line,
-        start_col,
-        end_line,
-        end_col,
-    };
-
-    Loc { region, value }
-}
-
-fn loc_escaped_unicode<'a, V>(
-    value: V,
-    state: &State<'a>,
-    buf_len: usize,
-    hex_str_len: usize,
-) -> Loc<V> {
-    let start_line = state.line;
-    // +1 due to the `"` which precedes buf.
-    let start_col = state.column + buf_len as u16 + 1;
-    let end_line = start_line;
-    // +3 due to the `\u{` and another + 1 due to the `}`
-    // -1 to prevent overshooting because end col is inclusive.
-    let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1;
-
-    let region = Region {
-        start_line,
-        start_col,
-        end_line,
-        end_col,
-    };
-
-    Loc { region, value }
-}
-
-#[inline(always)]
-fn handle_escaped_char<'a, I>(
-    arena: &'a Bump,
-    state: &State<'a>,
-    ch: char,
-    chars: &mut Peekable<I>,
-    buf: &mut String<'a>,
-    problems: &mut Problems,
-) -> Result<Option<&'a str>, (Fail, State<'a>)>
+fn parse_block_string<'a, I>(
+    _arena: &'a Bump,
+    _state: State<'a>,
+    _chars: &mut I,
+) -> ParseResult<'a, Expr<'a>>
 where
    I: Iterator<Item = char>,
 {
-    match ch {
-        '\\' => buf.push('\\'),
-        '"' => buf.push('"'),
-        't' => buf.push('\t'),
-        'n' => buf.push('\n'),
-        'r' => buf.push('\r'),
-        '0' => buf.push('\0'), // We explicitly support null characters, as we
-        // can't be sure we won't receive them from Rust.
-        'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?,
-        '(' => {
-            let ident = parse_interpolated_ident(arena, state, chars)?;
-
-            return Ok(Some(ident));
-        }
-        '\t' => {
-            // Report and continue.
-            // Tabs are syntax errors, but maybe the rest of the string is fine!
-            problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
-        }
-        '\r' => {
-            // Report and continue.
-            // Carriage returns aren't allowed in string literals,
-            // but maybe the rest of the string is fine!
-            problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
-        }
-        '\n' => {
-            // Report and bail out.
-            // We can't safely assume where the string was supposed to end.
-            problems.push(loc_escaped_char(
-                Problem::NewlineInLiteral,
-                &state,
-                buf.len(),
-            ));
-
-            return Err(unexpected_eof(
-                buf.len(),
-                Attempting::UnicodeEscape,
-                state.clone(),
-            ));
-        }
-        _ => {
-            // Report and continue.
-            // An unsupported escaped char (e.g. \q) shouldn't halt parsing.
-            problems.push(loc_escaped_char(
-                Problem::UnsupportedEscapedChar,
-                &state,
-                buf.len(),
-            ));
-        }
-    }
-
-    Ok(None)
-}
-
-#[inline(always)]
-fn handle_escaped_unicode<'a, I>(
-    arena: &'a Bump,
-    state: &State<'a>,
-    chars: &mut Peekable<I>,
-    buf: &mut String<'a>,
-    problems: &mut Problems,
-) -> Result<(), (Fail, State<'a>)>
-where
-    I: Iterator<Item = char>,
-{
-    // \u{00A0} is how you specify a Unicode code point,
-    // so we should always see a '{' next.
-    if chars.next() != Some('{') {
-        let start_line = state.line;
-        // +1 due to the `"` which precedes buf
-        let start_col = state.column + 1 + buf.len() as u16;
-        let end_line = start_line;
-
-        // All we parsed was `\u`, so end on the column after `\`'s column.
-        let end_col = start_col + 1;
-
-        let region = Region {
-            start_line,
-            start_col,
-            end_line,
-            end_col,
-        };
-
-        problems.push(Loc {
-            region,
-            value: Problem::NoUnicodeDigits,
-        });
-
-        // The rest of the string literal might be fine. Keep parsing!
-        return Ok(());
-    }
-
-    // Record the point in the string literal where we started parsing `\u`
-    let start_of_unicode = buf.len();
-
-    // Stores the accumulated unicode digits
-    let mut hex_str = String::new_in(arena);
-
-    while let Some(hex_char) = chars.next() {
-        match hex_char {
-            '}' => {
-                // Done! Validate and add it to the buffer.
-                match u32::from_str_radix(&hex_str, 16) {
-                    Ok(code_pt) => {
-                        if code_pt > 0x10FFFF {
-                            let start_line = state.line;
-                            // +1 due to the `"` which precedes buf
-                            // +3 due to the `\u{` which precedes the hex digits
-                            let start_col = state.column + 1 + buf.len() as u16 + 3;
-                            let end_line = start_line;
-
-                            // We want to underline only the number. That's the error!
-                            // -1 because we want to end on the last digit, not
-                            // overshoot it.
-                            let end_col = start_col + hex_str.len() as u16 - 1;
-
-                            let region = Region {
-                                start_line,
-                                start_col,
-                                end_line,
-                                end_col,
-                            };
-
-                            problems.push(Loc {
-                                region,
-                                value: Problem::UnicodeCodePointTooLarge,
-                            });
-                        } else {
-                            // If it all checked out, add it to
-                            // the main buffer.
-                            match char::from_u32(code_pt) {
-                                Some(ch) => buf.push(ch),
-                                None => {
-                                    problems.push(loc_escaped_unicode(
-                                        Problem::InvalidUnicodeCodePoint,
-                                        &state,
-                                        start_of_unicode,
-                                        hex_str.len(),
-                                    ));
-                                }
-                            }
-                        }
-                    }
-                    Err(_) => {
-                        let problem = if hex_str.is_empty() {
-                            Problem::NoUnicodeDigits
-                        } else {
-                            Problem::NonHexCharsInUnicodeCodePoint
-                        };
-
-                        problems.push(loc_escaped_unicode(
-                            problem,
-                            &state,
-                            start_of_unicode,
-                            hex_str.len(),
-                        ));
-                    }
-                }
-
-                // We are now done processing the unicode portion of the string,
-                // so exit the loop without further advancing the iterator.
-                return Ok(());
-            }
-            '\t' => {
-                // Report and continue.
-                // Tabs are syntax errors, but maybe the rest of the string is fine!
-                problems.push(loc_escaped_unicode(
-                    Problem::Tab,
-                    &state,
-                    start_of_unicode,
-                    hex_str.len(),
-                ));
-            }
-            '\r' => {
-                // Report and continue.
-                // Carriage returns aren't allowed in string literals,
-                // but maybe the rest of the string is fine!
-                problems.push(loc_escaped_unicode(
-                    Problem::CarriageReturn,
-                    &state,
-                    start_of_unicode,
-                    hex_str.len(),
-                ));
-            }
-            '\n' => {
-                // Report and bail out.
-                // We can't safely assume where the string was supposed to end.
-                problems.push(loc_escaped_unicode(
-                    Problem::NewlineInLiteral,
-                    &state,
-                    start_of_unicode,
-                    hex_str.len(),
-                ));
-
-                return Err(unexpected_eof(
-                    buf.len(),
-                    Attempting::UnicodeEscape,
-                    state.clone(),
-                ));
-            }
-            normal_char => hex_str.push(normal_char),
-        }
-
-        // If we're about to hit the end of the string, and we didn't already
-        // complete parsing a valid unicode escape sequence, this is a malformed
-        // escape sequence - it wasn't terminated!
-        if chars.peek() == Some(&'"') {
-            // Record a problem and exit the loop early, so the string literal
-            // parsing logic can consume the quote and do its job as normal.
-            let start_line = state.line;
-            // +1 due to the `"` which precedes buf.
-            let start_col = state.column + buf.len() as u16 + 1;
-            let end_line = start_line;
-            // +3 due to the `\u{`
-            // -1 to prevent overshooting because end col is inclusive.
-            let end_col = start_col + 3 + hex_str.len() as u16 - 1;
-
-            let region = Region {
-                start_line,
-                start_col,
-                end_line,
-                end_col,
-            };
-
-            problems.push(Loc {
-                region,
-                value: Problem::MalformedEscapedUnicode,
-            });
-
-            return Ok(());
-        }
-    }
-
-    Ok(())
-}
-
-#[inline(always)]
-fn parse_interpolated_ident<'a, I>(
-    arena: &'a Bump,
-    state: &State<'a>,
-    chars: &mut Peekable<I>,
-) -> Result<&'a str, (Fail, State<'a>)>
-where
-    I: Iterator<Item = char>,
-{
-    // This will return Err on invalid identifiers like "if"
-    let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?;
-
-    // Make sure we got a closing ) to end the interpolation.
-    match next_char {
-        Some(')') => Ok(string),
-        Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)),
-        None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)),
-    }
+    // So far we have consumed the `"""` and that's it.
+    let _parsed_chars = 3;
+    panic!("TODO parse block string, advance state, etc");
 }