mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-29 06:44:46 +00:00
attempt at multiline string
This commit is contained in:
parent
98a3d42eff
commit
41720ec5db
2 changed files with 98 additions and 144 deletions
|
@ -1557,6 +1557,26 @@ mod test_can {
|
||||||
assert_can(r#""x\u(101010)x""#, expr_str("x\u{101010}x"));
|
assert_can(r#""x\u(101010)x""#, expr_str("x\u{101010}x"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn block_string() {
|
||||||
|
assert_can(
|
||||||
|
r#"
|
||||||
|
"""foobar"""
|
||||||
|
"#,
|
||||||
|
expr_str("foobar"),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_can(
|
||||||
|
indoc!(
|
||||||
|
r#"
|
||||||
|
"""foo
|
||||||
|
bar"""
|
||||||
|
"#
|
||||||
|
),
|
||||||
|
expr_str("foo\nbar"),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_too_large_unicode_escape() {
|
// fn string_with_too_large_unicode_escape() {
|
||||||
// // Should be too big - max size should be 10FFFF.
|
// // Should be too big - max size should be 10FFFF.
|
||||||
|
|
|
@ -41,6 +41,14 @@ fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! advance_state {
|
||||||
|
($state:expr, $n:expr) => {
|
||||||
|
$state.advance_without_indenting_ee($n, |r, c| {
|
||||||
|
EString::Space(BadInputError::LineTooLong, r, c)
|
||||||
|
})
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||||
use StrLiteral::*;
|
use StrLiteral::*;
|
||||||
|
|
||||||
|
@ -51,23 +59,17 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||||
if state.bytes.starts_with(b"\"\"\"") {
|
if state.bytes.starts_with(b"\"\"\"") {
|
||||||
// we will be parsing a multi-string
|
// we will be parsing a multi-string
|
||||||
is_multiline = true;
|
is_multiline = true;
|
||||||
bytes = state.bytes[3..].iter()
|
bytes = state.bytes[3..].iter();
|
||||||
|
state = advance_state!(state, 3)?;
|
||||||
} else if state.bytes.starts_with(b"\"") {
|
} else if state.bytes.starts_with(b"\"") {
|
||||||
// we will be parsing a single-string
|
// we will be parsing a single-string
|
||||||
is_multiline = true;
|
is_multiline = false;
|
||||||
bytes = state.bytes[1..].iter()
|
bytes = state.bytes[1..].iter();
|
||||||
|
state = advance_state!(state, 1)?;
|
||||||
} else {
|
} else {
|
||||||
return Err((NoProgress, EString::Open(state.line, state.column), state));
|
return Err((NoProgress, EString::Open(state.line, state.column), state));
|
||||||
}
|
}
|
||||||
|
|
||||||
// String literals must start with a quote.
|
|
||||||
// If this doesn't, it must not be a string literal!
|
|
||||||
|
|
||||||
// Advance past the opening quotation mark.
|
|
||||||
state = state.advance_without_indenting_ee(1, |r, c| {
|
|
||||||
EString::Space(BadInputError::LineTooLong, r, c)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// At the parsing stage we keep the entire raw string, because the formatter
|
// At the parsing stage we keep the entire raw string, because the formatter
|
||||||
// needs the raw string. (For example, so it can "remember" whether you
|
// needs the raw string. (For example, so it can "remember" whether you
|
||||||
// wrote \u{...} or the actual unicode character itself.)
|
// wrote \u{...} or the actual unicode character itself.)
|
||||||
|
@ -77,14 +79,6 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||||
let mut segment_parsed_bytes = 0;
|
let mut segment_parsed_bytes = 0;
|
||||||
let mut segments = Vec::new_in(arena);
|
let mut segments = Vec::new_in(arena);
|
||||||
|
|
||||||
macro_rules! advance_state {
|
|
||||||
($state:expr, $n:expr) => {
|
|
||||||
$state.advance_without_indenting_ee($n, |r, c| {
|
|
||||||
EString::Space(BadInputError::LineTooLong, r, c)
|
|
||||||
})
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! escaped_char {
|
macro_rules! escaped_char {
|
||||||
($ch:expr) => {
|
($ch:expr) => {
|
||||||
// Record the escaped char.
|
// Record the escaped char.
|
||||||
|
@ -114,7 +108,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||||
|
|
||||||
segments.push($transform(string));
|
segments.push($transform(string));
|
||||||
}
|
}
|
||||||
Err(reason) => {
|
Err(_) => {
|
||||||
return Err((
|
return Err((
|
||||||
MadeProgress,
|
MadeProgress,
|
||||||
EString::Space(BadInputError::BadUtf8, state.line, state.column),
|
EString::Space(BadInputError::BadUtf8, state.line, state.column),
|
||||||
|
@ -142,58 +136,77 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||||
|
|
||||||
match byte {
|
match byte {
|
||||||
b'"' => {
|
b'"' => {
|
||||||
// This is the end of the string!
|
|
||||||
if segment_parsed_bytes == 1 && segments.is_empty() {
|
if segment_parsed_bytes == 1 && segments.is_empty() {
|
||||||
match bytes.next() {
|
// special case of the empty string
|
||||||
Some(b'"') => {
|
if is_multiline {
|
||||||
// If the very first three chars were all `"`,
|
if bytes.as_slice().starts_with(b"\"\"") {
|
||||||
// then this literal begins with `"""`
|
return Ok((MadeProgress, Block(&[]), advance_state!(state, 3)?));
|
||||||
// and is a block string.
|
} else {
|
||||||
// return parse_block_string(arena, state, &mut bytes);
|
// this quote is in a block string
|
||||||
todo!()
|
continue;
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// Advance 1 for the close quote
|
|
||||||
return Ok((
|
|
||||||
MadeProgress,
|
|
||||||
PlainLine(""),
|
|
||||||
advance_state!(state, 1)?,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
end_segment!(StrSegment::Plaintext);
|
|
||||||
|
|
||||||
let expr = if segments.len() == 1 {
|
|
||||||
// We had exactly one segment, so this is a candidate
|
|
||||||
// to be StrLiteral::Plaintext
|
|
||||||
match segments.pop().unwrap() {
|
|
||||||
StrSegment::Plaintext(string) => StrLiteral::PlainLine(string),
|
|
||||||
other => {
|
|
||||||
let vec = bumpalo::vec![in arena; other];
|
|
||||||
|
|
||||||
StrLiteral::Line(vec.into_bump_slice())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Line(segments.into_bump_slice())
|
// This is the end of the string!
|
||||||
};
|
// Advance 1 for the close quote
|
||||||
|
return Ok((MadeProgress, PlainLine(""), advance_state!(state, 1)?));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if is_multiline {
|
||||||
|
if bytes.as_slice().starts_with(b"\"\"") {
|
||||||
|
end_segment!(StrSegment::Plaintext);
|
||||||
|
|
||||||
// Advance the state 1 to account for the closing `"`
|
let expr = if segments.len() == 1 {
|
||||||
return Ok((MadeProgress, expr, advance_state!(state, 1)?));
|
// We had exactly one segment, so this is a candidate
|
||||||
|
// to be StrLiteral::Plaintext
|
||||||
|
match segments.pop().unwrap() {
|
||||||
|
StrSegment::Plaintext(string) => {
|
||||||
|
StrLiteral::PlainLine(string)
|
||||||
|
}
|
||||||
|
other => StrLiteral::Line(arena.alloc([other])),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Block(arena.alloc([segments.into_bump_slice()]))
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok((MadeProgress, expr, advance_state!(state, 3)?));
|
||||||
|
} else {
|
||||||
|
// this quote is in a block string
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
end_segment!(StrSegment::Plaintext);
|
||||||
|
|
||||||
|
let expr = if segments.len() == 1 {
|
||||||
|
// We had exactly one segment, so this is a candidate
|
||||||
|
// to be StrLiteral::Plaintext
|
||||||
|
match segments.pop().unwrap() {
|
||||||
|
StrSegment::Plaintext(string) => StrLiteral::PlainLine(string),
|
||||||
|
other => StrLiteral::Line(arena.alloc([other])),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Line(segments.into_bump_slice())
|
||||||
|
};
|
||||||
|
|
||||||
|
// Advance the state 1 to account for the closing `"`
|
||||||
|
return Ok((MadeProgress, expr, advance_state!(state, 1)?));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
b'\n' => {
|
b'\n' => {
|
||||||
// This is a single-line string, which cannot have newlines!
|
if is_multiline {
|
||||||
// Treat this as an unclosed string literal, and consume
|
continue;
|
||||||
// all remaining chars. This will mask all other errors, but
|
} else {
|
||||||
// it should make it easiest to debug; the file will be a giant
|
// This is a single-line string, which cannot have newlines!
|
||||||
// error starting from where the open quote appeared.
|
// Treat this as an unclosed string literal, and consume
|
||||||
return Err((
|
// all remaining chars. This will mask all other errors, but
|
||||||
MadeProgress,
|
// it should make it easiest to debug; the file will be a giant
|
||||||
EString::EndlessSingle(state.line, state.column),
|
// error starting from where the open quote appeared.
|
||||||
state,
|
return Err((
|
||||||
));
|
MadeProgress,
|
||||||
|
EString::EndlessSingle(state.line, state.column),
|
||||||
|
state,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
b'\\' => {
|
b'\\' => {
|
||||||
// We're about to begin an escaped segment of some sort!
|
// We're about to begin an escaped segment of some sort!
|
||||||
|
@ -303,82 +316,3 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_block_string<'a, I>(
|
|
||||||
arena: &'a Bump,
|
|
||||||
state: State<'a>,
|
|
||||||
bytes: &mut I,
|
|
||||||
) -> ParseResult<'a, StrLiteral<'a>, SyntaxError<'a>>
|
|
||||||
where
|
|
||||||
I: Iterator<Item = &'a u8>,
|
|
||||||
{
|
|
||||||
// So far we have consumed the `"""` and that's it.
|
|
||||||
let mut parsed_chars = 3;
|
|
||||||
let mut prev_byte = b'"';
|
|
||||||
let mut quotes_seen = 0;
|
|
||||||
|
|
||||||
// start at 3 to omit the opening `"`.
|
|
||||||
let mut line_start = 3;
|
|
||||||
|
|
||||||
let mut lines: Vec<'a, &'a str> = Vec::new_in(arena);
|
|
||||||
|
|
||||||
for byte in bytes {
|
|
||||||
parsed_chars += 1;
|
|
||||||
|
|
||||||
// Potentially end the string (unless this is an escaped `"`!)
|
|
||||||
match byte {
|
|
||||||
b'"' if prev_byte != b'\\' => {
|
|
||||||
if quotes_seen == 2 {
|
|
||||||
// three consecutive qoutes, end string
|
|
||||||
|
|
||||||
// Subtract 3 from parsed_chars so we omit the closing `"`.
|
|
||||||
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
|
|
||||||
|
|
||||||
return match parse_utf8(line_bytes) {
|
|
||||||
Ok(line) => {
|
|
||||||
// state = state.advance_without_indenting(parsed_chars)?;
|
|
||||||
|
|
||||||
// lines.push(line);
|
|
||||||
|
|
||||||
// Ok((StrLiteral::Block(lines.into_bump_slice()), state))
|
|
||||||
Err((
|
|
||||||
MadeProgress,
|
|
||||||
SyntaxError::NotYetImplemented(format!(
|
|
||||||
"TODO parse this line in a block string: {:?}",
|
|
||||||
line
|
|
||||||
)),
|
|
||||||
state,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
Err(reason) => state.fail(arena, MadeProgress, reason),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
quotes_seen += 1;
|
|
||||||
}
|
|
||||||
b'\n' => {
|
|
||||||
// note this includes the newline
|
|
||||||
let line_bytes = &state.bytes[line_start..parsed_chars];
|
|
||||||
|
|
||||||
match parse_utf8(line_bytes) {
|
|
||||||
Ok(line) => {
|
|
||||||
lines.push(line);
|
|
||||||
|
|
||||||
quotes_seen = 0;
|
|
||||||
line_start = parsed_chars;
|
|
||||||
}
|
|
||||||
Err(reason) => {
|
|
||||||
return state.fail(arena, MadeProgress, reason);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
quotes_seen = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prev_byte = *byte;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We ran out of characters before finding 3 closing quotes
|
|
||||||
Err(unexpected_eof(arena, state, parsed_chars))
|
|
||||||
}
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue