mirror of
https://github.com/roc-lang/roc.git
synced 2025-07-23 22:45:14 +00:00
445 lines
15 KiB
Rust
445 lines
15 KiB
Rust
// use bumpalo::collections::string::String;
|
|
// use bumpalo::collections::vec::Vec;
|
|
use bumpalo::Bump;
|
|
use roc_parse::ast::Expr;
|
|
// use roc_parse::ast::{Attempting, Expr};
|
|
// use roc_parse::ident;
|
|
// use roc_parse::parser::{unexpected, unexpected_eof, Fail, Parser, State};
|
|
// use roc_parse::problems::{Problem, Problems};
|
|
// use roc_region::all::{Loc, Region};
|
|
use roc_region::all::Region;
|
|
// use std::char;
|
|
// use std::iter::Peekable;
|
|
|
|
pub fn canonical_string_literal<'a>(_arena: &Bump, _raw: &'a str, _region: Region) -> Expr<'a> {
|
|
panic!("TODO restore canonicalization");
|
|
}
|
|
// let mut problems = std::vec::Vec::new();
|
|
|
|
// // Stores the accumulated string characters
|
|
// let mut buf = String::new_in(arena);
|
|
|
|
// // This caches the total string length of interpolated_pairs. Every
|
|
// // time we add a new pair to interpolated_pairs, we increment this
|
|
// // by the sum of whatever we parsed in order to obtain that pair.
|
|
// let mut buf_col_offset: usize = 0;
|
|
|
|
// // Stores interpolated identifiers, if any.
|
|
// let mut interpolated_pairs = Vec::new_in(arena);
|
|
|
|
// let mut chars = raw.chars();
|
|
|
|
// while let Some(ch) = chars.next() {
|
|
// match ch {
|
|
// // If it's a backslash, escape things.
|
|
// '\\' => match chars.next() {
|
|
// Some(next_ch) => {
|
|
// if let Some(ident) = handle_escaped_char(
|
|
// arena,
|
|
// &state,
|
|
// next_ch,
|
|
// &mut chars,
|
|
// &mut buf,
|
|
// &mut problems,
|
|
// )? {
|
|
// let expr = Expr::Var(ident);
|
|
|
|
// // +2 for `\(` and then another +1 for `)` at the end
|
|
// let parsed_length = buf.len() + 2 + ident.len() + 1;
|
|
|
|
// // Casting should always succeed in this section, because
|
|
// // if this string literal overflowed our maximum
|
|
// // line length, that would have already happened back
|
|
// // in the parsing step, and we never would have reached
|
|
// // this code. Still, debug_assert that they won't!
|
|
// debug_assert!(buf_col_offset <= u16::MAX as usize);
|
|
// debug_assert!(ident.len() <= u16::MAX as usize);
|
|
// debug_assert!((parsed_length - ident.len() - 1) <= u16::MAX as usize);
|
|
|
|
// let start_line = state.line;
|
|
|
|
// // Subtract ident length and another 1 for the `)`
|
|
// let start_col = state.column
|
|
// + buf_col_offset as u16
|
|
// + (parsed_length - ident.len() - 1) as u16;
|
|
// let ident_region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line: start_line,
|
|
// end_col: start_col + ident.len() as u16 - 1,
|
|
// };
|
|
// let loc_expr = Loc {
|
|
// region: ident_region,
|
|
// value: expr,
|
|
// };
|
|
|
|
// // Push the accumulated string into the pairs list,
|
|
// // along with the ident that came after it.
|
|
// interpolated_pairs.push((buf.into_bump_str(), loc_expr));
|
|
|
|
// // Reset the buffer so we start working on a new string.
|
|
// buf = String::new_in(arena);
|
|
|
|
// // Advance the cached offset of how many chars we've parsed,
|
|
// // so the next time we see an interpolated ident, we can
|
|
// // correctly calculate its region.
|
|
// buf_col_offset += parsed_length;
|
|
// }
|
|
// }
|
|
// None => {
|
|
// problems.push(loc_char(Problem::TrailingBackslash, &state, buf.len()));
|
|
// }
|
|
// },
|
|
// '\t' => {
|
|
// // Tabs are syntax errors.
|
|
// problems.push(loc_char(Problem::Tab, &state, buf.len()));
|
|
// }
|
|
// '\r' => {
|
|
// // Carriage returns aren't allowed in string literals.
|
|
// problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
|
|
// }
|
|
// normal_char => buf.push(normal_char),
|
|
// }
|
|
// }
|
|
|
|
// // We ran out of characters; this is the end of the string!
|
|
// if problems.is_empty() {
|
|
// let final_str = buf.into_bump_str();
|
|
|
|
// if interpolated_pairs.is_empty() {
|
|
// Expr::Str(final_str)
|
|
// } else {
|
|
// let tuple_ref = arena.alloc((interpolated_pairs.into_bump_slice(), final_str));
|
|
|
|
// Expr::InterpolatedStr(tuple_ref)
|
|
// }
|
|
// } else {
|
|
// Expr::MalformedStr(problems.into_boxed_slice())
|
|
// }
|
|
// }
|
|
|
|
// fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Located<V> {
|
|
// let start_line = state.line;
|
|
// let start_col = state.column + buf_len as u16;
|
|
// let end_line = start_line;
|
|
// // All invalid chars should have a length of 1
|
|
// let end_col = state.column + 1;
|
|
|
|
// let region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line,
|
|
// end_col,
|
|
// };
|
|
|
|
// Loc { region, value }
|
|
// }
|
|
|
|
// fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Located<V> {
|
|
// let start_line = state.line;
|
|
// let start_col = state.column + buf_len as u16;
|
|
// let end_line = start_line;
|
|
// // escapes should all be 2 chars long
|
|
// let end_col = state.column + 1;
|
|
|
|
// let region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line,
|
|
// end_col,
|
|
// };
|
|
|
|
// Loc { region, value }
|
|
// }
|
|
|
|
// fn loc_escaped_unicode<'a, V>(
|
|
// value: V,
|
|
// state: &State<'a>,
|
|
// buf_len: usize,
|
|
// hex_str_len: usize,
|
|
// ) -> Located<V> {
|
|
// let start_line = state.line;
|
|
// // +1 due to the `"` which precedes buf.
|
|
// let start_col = state.column + buf_len as u16 + 1;
|
|
// let end_line = start_line;
|
|
// // +3 due to the `\u{` and another + 1 due to the `}`
|
|
// // -1 to prevent overshooting because end col is inclusive.
|
|
// let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1;
|
|
|
|
// let region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line,
|
|
// end_col,
|
|
// };
|
|
|
|
// Loc { region, value }
|
|
// }
|
|
|
|
// #[inline(always)]
|
|
// fn handle_escaped_char<'a, I>(
|
|
// arena: &'a Bump,
|
|
// state: &State<'a>,
|
|
// ch: char,
|
|
// chars: &mut Peekable<I>,
|
|
// buf: &mut String<'a>,
|
|
// problems: &mut Problems,
|
|
// ) -> Result<Option<&'a str>, (Fail, State<'a>)>
|
|
// where
|
|
// I: Iterator<Item = char>,
|
|
// {
|
|
// match ch {
|
|
// '\\' => buf.push('\\'),
|
|
// '"' => buf.push('"'),
|
|
// 't' => buf.push('\t'),
|
|
// 'n' => buf.push('\n'),
|
|
// 'r' => buf.push('\r'),
|
|
// '0' => buf.push('\0'), // We explicitly support null characters, as we
|
|
// // can't be sure we won't receive them from Rust.
|
|
// 'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?,
|
|
// '(' => {
|
|
// let ident = parse_interpolated_ident(arena, state, chars)?;
|
|
|
|
// return Ok(Some(ident));
|
|
// }
|
|
// '\t' => {
|
|
// // Report and continue.
|
|
// // Tabs are syntax errors, but maybe the rest of the string is fine!
|
|
// problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
|
|
// }
|
|
// '\r' => {
|
|
// // Report and continue.
|
|
// // Carriage returns aren't allowed in string literals,
|
|
// // but maybe the rest of the string is fine!
|
|
// problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
|
|
// }
|
|
// '\n' => {
|
|
// // Report and bail out.
|
|
// // We can't safely assume where the string was supposed to end.
|
|
// problems.push(loc_escaped_char(
|
|
// Problem::NewlineInLiteral,
|
|
// &state,
|
|
// buf.len(),
|
|
// ));
|
|
|
|
// return Err(unexpected_eof(
|
|
// buf.len(),
|
|
// Attempting::UnicodeEscape,
|
|
// state.clone(),
|
|
// ));
|
|
// }
|
|
// _ => {
|
|
// // Report and continue.
|
|
// // An unsupported escaped char (e.g. \q) shouldn't halt parsing.
|
|
// problems.push(loc_escaped_char(
|
|
// Problem::UnsupportedEscapedChar,
|
|
// &state,
|
|
// buf.len(),
|
|
// ));
|
|
// }
|
|
// }
|
|
|
|
// Ok(None)
|
|
// }
|
|
|
|
// #[inline(always)]
|
|
// fn handle_escaped_unicode<'a, I>(
|
|
// arena: &'a Bump,
|
|
// state: &State<'a>,
|
|
// chars: &mut Peekable<I>,
|
|
// buf: &mut String<'a>,
|
|
// problems: &mut Problems,
|
|
// ) -> Result<(), (Fail, State<'a>)>
|
|
// where
|
|
// I: Iterator<Item = char>,
|
|
// {
|
|
// // \u{00A0} is how you specify a Unicode code point,
|
|
// // so we should always see a '{' next.
|
|
// if chars.next() != Some('{') {
|
|
// let start_line = state.line;
|
|
// // +1 due to the `"` which precedes buf
|
|
// let start_col = state.column + 1 + buf.len() as u16;
|
|
// let end_line = start_line;
|
|
|
|
// // All we parsed was `\u`, so end on the column after `\`'s column.
|
|
// let end_col = start_col + 1;
|
|
|
|
// let region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line,
|
|
// end_col,
|
|
// };
|
|
|
|
// problems.push(Loc {
|
|
// region,
|
|
// value: Problem::NoUnicodeDigits,
|
|
// });
|
|
|
|
// // The rest of the string literal might be fine. Keep parsing!
|
|
// return Ok(());
|
|
// }
|
|
|
|
// // Record the point in the string literal where we started parsing `\u`
|
|
// let start_of_unicode = buf.len();
|
|
|
|
// // Stores the accumulated unicode digits
|
|
// let mut hex_str = String::new_in(arena);
|
|
|
|
// while let Some(hex_char) = chars.next() {
|
|
// match hex_char {
|
|
// '}' => {
|
|
// // Done! Validate and add it to the buffer.
|
|
// match u32::from_str_radix(&hex_str, 16) {
|
|
// Ok(code_pt) => {
|
|
// if code_pt > 0x10FFFF {
|
|
// let start_line = state.line;
|
|
// // +1 due to the `"` which precedes buf
|
|
// // +3 due to the `\u{` which precedes the hex digits
|
|
// let start_col = state.column + 1 + buf.len() as u16 + 3;
|
|
// let end_line = start_line;
|
|
|
|
// // We want to underline only the number. That's the error!
|
|
// // -1 because we want to end on the last digit, not
|
|
// // overshoot it.
|
|
// let end_col = start_col + hex_str.len() as u16 - 1;
|
|
|
|
// let region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line,
|
|
// end_col,
|
|
// };
|
|
|
|
// problems.push(Loc {
|
|
// region,
|
|
// value: Problem::UnicodeCodePtTooLarge,
|
|
// });
|
|
// } else {
|
|
// // If it all checked out, add it to
|
|
// // the main buffer.
|
|
// match char::from_u32(code_pt) {
|
|
// Some(ch) => buf.push(ch),
|
|
// None => {
|
|
// problems.push(loc_escaped_unicode(
|
|
// Problem::InvalidUnicodeCodePt,
|
|
// &state,
|
|
// start_of_unicode,
|
|
// hex_str.len(),
|
|
// ));
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
// Err(_) => {
|
|
// let problem = if hex_str.is_empty() {
|
|
// Problem::NoUnicodeDigits
|
|
// } else {
|
|
// Problem::NonHexCharsInUnicodeCodePt
|
|
// };
|
|
|
|
// problems.push(loc_escaped_unicode(
|
|
// problem,
|
|
// &state,
|
|
// start_of_unicode,
|
|
// hex_str.len(),
|
|
// ));
|
|
// }
|
|
// }
|
|
|
|
// // We are now done processing the unicode portion of the string,
|
|
// // so exit the loop without further advancing the iterator.
|
|
// return Ok(());
|
|
// }
|
|
// '\t' => {
|
|
// // Report and continue.
|
|
// // Tabs are syntax errors, but maybe the rest of the string is fine!
|
|
// problems.push(loc_escaped_unicode(
|
|
// Problem::Tab,
|
|
// &state,
|
|
// start_of_unicode,
|
|
// hex_str.len(),
|
|
// ));
|
|
// }
|
|
// '\r' => {
|
|
// // Report and continue.
|
|
// // Carriage returns aren't allowed in string literals,
|
|
// // but maybe the rest of the string is fine!
|
|
// problems.push(loc_escaped_unicode(
|
|
// Problem::CarriageReturn,
|
|
// &state,
|
|
// start_of_unicode,
|
|
// hex_str.len(),
|
|
// ));
|
|
// }
|
|
// '\n' => {
|
|
// // Report and bail out.
|
|
// // We can't safely assume where the string was supposed to end.
|
|
// problems.push(loc_escaped_unicode(
|
|
// Problem::NewlineInLiteral,
|
|
// &state,
|
|
// start_of_unicode,
|
|
// hex_str.len(),
|
|
// ));
|
|
|
|
// return Err(unexpected_eof(
|
|
// buf.len(),
|
|
// Attempting::UnicodeEscape,
|
|
// state.clone(),
|
|
// ));
|
|
// }
|
|
// normal_char => hex_str.push(normal_char),
|
|
// }
|
|
|
|
// // If we're about to hit the end of the string, and we didn't already
|
|
// // complete parsing a valid unicode escape sequence, this is a malformed
|
|
// // escape sequence - it wasn't terminated!
|
|
// if chars.peek() == Some(&'"') {
|
|
// // Record a problem and exit the loop early, so the string literal
|
|
// // parsing logic can consume the quote and do its job as normal.
|
|
// let start_line = state.line;
|
|
// // +1 due to the `"` which precedes buf.
|
|
// let start_col = state.column + buf.len() as u16 + 1;
|
|
// let end_line = start_line;
|
|
// // +3 due to the `\u{`
|
|
// // -1 to prevent overshooting because end col is inclusive.
|
|
// let end_col = start_col + 3 + hex_str.len() as u16 - 1;
|
|
|
|
// let region = Region {
|
|
// start_line,
|
|
// start_col,
|
|
// end_line,
|
|
// end_col,
|
|
// };
|
|
|
|
// problems.push(Loc {
|
|
// region,
|
|
// value: Problem::MalformedEscapedUnicode,
|
|
// });
|
|
|
|
// return Ok(());
|
|
// }
|
|
// }
|
|
|
|
// Ok(())
|
|
// }
|
|
|
|
// #[inline(always)]
|
|
// fn parse_interpolated_ident<'a, I>(
|
|
// arena: &'a Bump,
|
|
// state: &State<'a>,
|
|
// chars: &mut Peekable<I>,
|
|
// ) -> Result<&'a str, (Fail, State<'a>)>
|
|
// where
|
|
// I: Iterator<Item = char>,
|
|
// {
|
|
// // This will return Err on invalid identifiers like "if"
|
|
// let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?;
|
|
|
|
// // Make sure we got a closing ) to end the interpolation.
|
|
// match next_char {
|
|
// Some(')') => Ok(string),
|
|
// Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)),
|
|
// None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)),
|
|
// }
|
|
// }
|