Merge pull request #6447 from roc-lang/fix-dollar

Fix parsing strings with non-interpolated "$"
This commit is contained in:
Folkert de Vries 2024-01-28 00:38:37 +01:00 committed by GitHub
commit 01761abede
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 89 additions and 62 deletions

View file

@ -173,6 +173,8 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
};
}
let mut preceded_by_dollar = false;
while let Some(&byte) = bytes.next() {
// This is for the byte we just grabbed from the iterator.
segment_parsed_bytes += 1;
@ -349,68 +351,6 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
return Err((MadeProgress, EString::EndlessSingleLine(start_state.pos())));
}
}
b'$' if !is_single_quote => {
// This is for the byte we're about to parse.
segment_parsed_bytes += 1;
// iff the '$' is followed by '(', this is string interpolation.
if let Some(b'(') = bytes.next() {
// We're about to begin string interpolation!
//
// End the previous segment so we can begin a new one.
// Retroactively end it right before the `$` char we parsed.
// (We can't use end_segment! here because it ends it right after
// the just-parsed character, which here would be '(' rather than '$')
// Don't push anything if the string would be empty.
if segment_parsed_bytes > 2 {
// exclude the 2 chars we just parsed, namely '$' and '('
let string_bytes = &state.bytes()[0..(segment_parsed_bytes - 2)];
match std::str::from_utf8(string_bytes) {
Ok(string) => {
state.advance_mut(string.len());
segments.push(StrSegment::Plaintext(string));
}
Err(_) => {
return Err((
MadeProgress,
EString::Space(BadInputError::BadUtf8, state.pos()),
));
}
}
}
// Advance past the `$(`
state.advance_mut(2);
let original_byte_count = state.bytes().len();
// Parse an arbitrary expression, followed by ')'
let (_progress, loc_expr, new_state) = skip_second!(
specialize_ref(
EString::Format,
loc(allocated(reset_min_indent(expr::expr_help())))
),
word1(b')', EString::FormatEnd)
)
.parse(arena, state, min_indent)?;
// Advance the iterator past the expr we just parsed.
for _ in 0..(original_byte_count - new_state.bytes().len()) {
bytes.next();
}
segments.push(StrSegment::Interpolated(loc_expr));
// Reset the segment
segment_parsed_bytes = 0;
state = new_state;
}
// If the '$' wasn't followed by '(', then this wasn't interpolation,
// and we don't need to do anything special.
}
b'\\' => {
// We're about to begin an escaped segment of some sort!
//
@ -510,10 +450,67 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
}
}
}
b'(' if preceded_by_dollar && !is_single_quote => {
// We're about to begin string interpolation!
//
// End the previous segment so we can begin a new one.
// Retroactively end it right before the `$` char we parsed.
// (We can't use end_segment! here because it ends it right after
// the just-parsed character, which here would be '(' rather than '$')
// Don't push anything if the string would be empty.
if segment_parsed_bytes > 2 {
// exclude the 2 chars we just parsed, namely '$' and '('
let string_bytes = &state.bytes()[0..(segment_parsed_bytes - 2)];
match std::str::from_utf8(string_bytes) {
Ok(string) => {
state.advance_mut(string.len());
segments.push(StrSegment::Plaintext(string));
}
Err(_) => {
return Err((
MadeProgress,
EString::Space(BadInputError::BadUtf8, state.pos()),
));
}
}
}
// Advance past the `$(`
state.advance_mut(2);
let original_byte_count = state.bytes().len();
// Parse an arbitrary expression, followed by ')'
let (_progress, loc_expr, new_state) = skip_second!(
specialize_ref(
EString::Format,
loc(allocated(reset_min_indent(expr::expr_help())))
),
word1(b')', EString::FormatEnd)
)
.parse(arena, state, min_indent)?;
// Advance the iterator past the expr we just parsed.
for _ in 0..(original_byte_count - new_state.bytes().len()) {
bytes.next();
}
segments.push(StrSegment::Interpolated(loc_expr));
// Reset the segment
segment_parsed_bytes = 0;
state = new_state;
}
_ => {
// All other characters need no special handling.
}
}
// iff the '$' is followed by '(', this is string interpolation.
// We'll check for the '(' on the next iteration of the loop.
preceded_by_dollar = byte == b'$';
}
// We ran out of characters before finding a closed quote

View file

@ -200,6 +200,36 @@ mod test_parse {
});
}
#[test]
fn string_of_just_dollar_sign() {
let arena = Bump::new();
assert_eq!(
Ok(Expr::Str(PlainLine("$"))),
parse_expr_with(&arena, arena.alloc(r#""$""#))
);
}
#[test]
fn string_beginning_with_dollar() {
let arena = Bump::new();
assert_eq!(
Ok(Expr::Str(PlainLine("$foo"))),
parse_expr_with(&arena, arena.alloc(r#""$foo""#))
);
}
#[test]
fn string_ending_with_dollar() {
let arena = Bump::new();
assert_eq!(
Ok(Expr::Str(PlainLine("foo$"))),
parse_expr_with(&arena, arena.alloc(r#""foo$""#))
);
}
#[test]
fn string_with_interpolation_in_back() {
assert_segments(r#""Hello $(name)""#, |arena| {