diff --git a/compiler/parse/src/ast.rs b/compiler/parse/src/ast.rs index cf4854a575..fcd1792b3e 100644 --- a/compiler/parse/src/ast.rs +++ b/compiler/parse/src/ast.rs @@ -86,21 +86,17 @@ pub struct WhenPattern<'a> { #[derive(Clone, Debug, PartialEq)] pub enum StrSegment<'a> { - Plaintext(&'a str), // e.g. "foo" - Unicode(Loc<&'a str>), // e.g. "00A0" in "\u(00A0)" - EscapedChar(char), // e.g. '\n' in "Hello!\n" - Interpolated { - // e.g. "App.version" in "Version: \(App.version)" - module_name: &'a str, - ident: &'a str, - region: Region, - }, + Plaintext(&'a str), // e.g. "foo" + Unicode(Loc<&'a str>), // e.g. "00A0" in "\u(00A0)" + EscapedChar(char), // e.g. '\n' in "Hello!\n" + Interpolated(Loc<&'a Expr<'a>>), // e.g. (name) in "Hi, \(name)!" } #[derive(Clone, Debug, PartialEq)] pub enum StrLiteral<'a> { + /// The most common case: a plain string with no escapes or interpolations PlainLine(&'a str), - LineWithEscapes(&'a [StrSegment<'a>]), + Line(&'a [StrSegment<'a>]), Block(&'a [&'a [StrSegment<'a>]]), } diff --git a/compiler/parse/src/string_literal.rs b/compiler/parse/src/string_literal.rs index bda1ca7fd9..d34f43b137 100644 --- a/compiler/parse/src/string_literal.rs +++ b/compiler/parse/src/string_literal.rs @@ -1,5 +1,8 @@ use crate::ast::{Attempting, StrLiteral, StrSegment}; -use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State}; +use crate::expr; +use crate::parser::{ + allocated, ascii_char, loc, parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State, +}; use bumpalo::collections::vec::Vec; use bumpalo::Bump; use roc_region::all::{Located, Region}; @@ -49,10 +52,6 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> { macro_rules! end_segment { ($transform:expr) => { - dbg!("ending segment"); - dbg!(segment_parsed_bytes - 1); - dbg!(&state.bytes); - // Don't push anything if the string would be empty. if segment_parsed_bytes > 1 { // This function is always called after we just parsed @@ -66,15 +65,11 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> { state = state.advance_without_indenting(string.len())?; segments.push($transform(string)); - - dbg!(&segments); } Err(reason) => { return state.fail(reason); } } - } else { - // If we parsed 0 bytes, } // Depending on where this macro is used, in some @@ -90,7 +85,6 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> { } while let Some(&byte) = bytes.next() { - dbg!("Parsing {:?}", (byte as char).to_string()); // This is for the byte we just grabbed from the iterator. segment_parsed_bytes += 1; @@ -120,11 +114,11 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> { other => { let vec = bumpalo::vec![in arena; other]; - StrLiteral::LineWithEscapes(vec.into_bump_slice()) + StrLiteral::Line(vec.into_bump_slice()) } } } else { - LineWithEscapes(segments.into_bump_slice()) + Line(segments.into_bump_slice()) }; // Advance the state 1 to account for the closing `"` @@ -157,8 +151,29 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> { // to figure out what type of escape it is. match bytes.next() { Some(b'(') => { - // This is an interpolated variable - todo!("Make a new parser state, then use it to parse ident followed by ')'"); + // Advance past the `\(` before using the expr parser + state = state.advance_without_indenting(2)?; + + let original_byte_count = state.bytes.len(); + + // This is an interpolated variable. + // Parse an arbitrary expression, then give a + // canonicalization error if that expression variant + // is not allowed inside a string interpolation. + let (loc_expr, new_state) = + skip_second!(loc(allocated(expr::expr(0))), ascii_char(')')) + .parse(arena, state)?; + + // Advance the iterator past the expr we just parsed. + for _ in 0..(original_byte_count - new_state.bytes.len()) { + bytes.next(); + } + + segments.push(StrSegment::Interpolated(loc_expr)); + + // Reset the segment + segment_parsed_bytes = 0; + state = new_state; } Some(b'u') => { // This is an escaped unicode character diff --git a/compiler/parse/tests/test_parse.rs b/compiler/parse/tests/test_parse.rs index 35f00f4fda..7754275d83 100644 --- a/compiler/parse/tests/test_parse.rs +++ b/compiler/parse/tests/test_parse.rs @@ -51,8 +51,8 @@ mod test_parse { } fn parses_with_escaped_char< - I: Fn(&'static str) -> String, - E: Fn(char, &Bump) -> Vec<'_, ast::StrSegment<'static>>, + I: Fn(&str) -> String, + E: Fn(char, &Bump) -> Vec<'_, ast::StrSegment<'_>>, >( to_input: I, to_expected: E, @@ -69,7 +69,7 @@ mod test_parse { ] { let actual = parse_with(&arena, arena.alloc(to_input(string))); let expected_slice = to_expected(*ch, &arena).into_bump_slice(); - let expected_expr = Expr::Str(LineWithEscapes(expected_slice)); + let expected_expr = Expr::Str(Line(expected_slice)); assert_eq!(Ok(expected_expr), actual); } @@ -86,7 +86,7 @@ mod test_parse { assert_parses_to( indoc!( r#" - "" + "" "# ), Str(PlainLine("")), @@ -128,6 +128,8 @@ mod test_parse { expect_parsed_str("123 abc 456 def", r#""123 abc 456 def""#); } + // BACKSLASH ESCAPES + #[test] fn string_with_escaped_char_at_end() { parses_with_escaped_char( @@ -160,10 +162,87 @@ mod test_parse { ); } + // INTERPOLATION + + fn assert_interpolations Vec<'_, ast::StrSegment<'_>>>( + input: &str, + to_expected: E, + ) { + let arena = Bump::new(); + let actual = parse_with(&arena, arena.alloc(input)); + let expected_slice = to_expected(&arena).into_bump_slice(); + let expected_expr = Expr::Str(Line(expected_slice)); + + assert_eq!(Ok(expected_expr), actual); + } + #[test] - fn string_with_single_quote() { - // This shoud NOT be escaped in a string. - expect_parsed_str("x'x", r#""x'x""#); + fn string_with_interpolation_in_middle() { + assert_interpolations(r#""Hi, \(name)!""#, |arena| { + let expr = arena.alloc(Var { + module_name: "", + ident: "name", + }); + + bumpalo::vec![in arena; + Plaintext("Hi, "), + Interpolated(Located::new(0, 0, 7, 11, expr)), + Plaintext("!") + ] + }); + } + + #[test] + fn string_with_interpolation_in_front() { + assert_interpolations(r#""\(name), hi!""#, |arena| { + let expr = arena.alloc(Var { + module_name: "", + ident: "name", + }); + + bumpalo::vec![in arena; + Interpolated(Located::new(0, 0, 3, 7, expr)), + Plaintext(", hi!") + ] + }); + } + + #[test] + fn string_with_interpolation_in_back() { + assert_interpolations(r#""Hello \(name)""#, |arena| { + let expr = arena.alloc(Var { + module_name: "", + ident: "name", + }); + + bumpalo::vec![in arena; + Plaintext("Hello "), + Interpolated(Located::new(0, 0, 9, 13, expr)) + ] + }); + } + + #[test] + fn string_with_multiple_interpolations() { + assert_interpolations(r#""Hi, \(name)! How is \(project) going?""#, |arena| { + let expr1 = arena.alloc(Var { + module_name: "", + ident: "name", + }); + + let expr2 = arena.alloc(Var { + module_name: "", + ident: "project", + }); + + bumpalo::vec![in arena; + Plaintext("Hi, "), + Interpolated(Located::new(0, 0, 7, 11, expr1)), + Plaintext("! How is "), + Interpolated(Located::new(0, 0, 23, 30, expr2)), + Plaintext(" going?") + ] + }); } #[test] @@ -510,7 +589,7 @@ mod test_parse { } #[test] - fn comment_with_unicode() { + fn comment_with_non_ascii() { let arena = Bump::new(); let spaced_int = arena .alloc(Num("3")) @@ -2490,8 +2569,6 @@ mod test_parse { // ); // } - // TODO test for \t \r and \n in string literals *outside* unicode escape sequence! - // // TODO test for non-ASCII variables // // TODO verify that when a string literal contains a newline before the