roc/crates/compiler/can/src/string.rs

445 lines
15 KiB
Rust

// use bumpalo::collections::string::String;
// use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_parse::ast::Expr;
// use roc_parse::ast::{Attempting, Expr};
// use roc_parse::ident;
// use roc_parse::parser::{unexpected, unexpected_eof, Fail, Parser, State};
// use roc_parse::problems::{Problem, Problems};
// use roc_region::all::{Loc, Region};
use roc_region::all::Region;
// use std::char;
// use std::iter::Peekable;
pub fn canonical_string_literal<'a>(_arena: &Bump, _raw: &'a str, _region: Region) -> Expr<'a> {
panic!("TODO restore canonicalization");
}
// let mut problems = std::vec::Vec::new();
// // Stores the accumulated string characters
// let mut buf = String::new_in(arena);
// // This caches the total string length of interpolated_pairs. Every
// // time we add a new pair to interpolated_pairs, we increment this
// // by the sum of whatever we parsed in order to obtain that pair.
// let mut buf_col_offset: usize = 0;
// // Stores interpolated identifiers, if any.
// let mut interpolated_pairs = Vec::new_in(arena);
// let mut chars = raw.chars();
// while let Some(ch) = chars.next() {
// match ch {
// // If it's a backslash, escape things.
// '\\' => match chars.next() {
// Some(next_ch) => {
// if let Some(ident) = handle_escaped_char(
// arena,
// &state,
// next_ch,
// &mut chars,
// &mut buf,
// &mut problems,
// )? {
// let expr = Expr::Var(ident);
// // +2 for `\(` and then another +1 for `)` at the end
// let parsed_length = buf.len() + 2 + ident.len() + 1;
// // Casting should always succeed in this section, because
// // if this string literal overflowed our maximum
// // line length, that would have already happened back
// // in the parsing step, and we never would have reached
// // this code. Still, debug_assert that they won't!
// debug_assert!(buf_col_offset <= u16::MAX as usize);
// debug_assert!(ident.len() <= u16::MAX as usize);
// debug_assert!((parsed_length - ident.len() - 1) <= u16::MAX as usize);
// let start_line = state.line;
// // Subtract ident length and another 1 for the `)`
// let start_col = state.column
// + buf_col_offset as u16
// + (parsed_length - ident.len() - 1) as u16;
// let ident_region = Region {
// start_line,
// start_col,
// end_line: start_line,
// end_col: start_col + ident.len() as u16 - 1,
// };
// let loc_expr = Loc {
// region: ident_region,
// value: expr,
// };
// // Push the accumulated string into the pairs list,
// // along with the ident that came after it.
// interpolated_pairs.push((buf.into_bump_str(), loc_expr));
// // Reset the buffer so we start working on a new string.
// buf = String::new_in(arena);
// // Advance the cached offset of how many chars we've parsed,
// // so the next time we see an interpolated ident, we can
// // correctly calculate its region.
// buf_col_offset += parsed_length;
// }
// }
// None => {
// problems.push(loc_char(Problem::TrailingBackslash, &state, buf.len()));
// }
// },
// '\t' => {
// // Tabs are syntax errors.
// problems.push(loc_char(Problem::Tab, &state, buf.len()));
// }
// '\r' => {
// // Carriage returns aren't allowed in string literals.
// problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
// }
// normal_char => buf.push(normal_char),
// }
// }
// // We ran out of characters; this is the end of the string!
// if problems.is_empty() {
// let final_str = buf.into_bump_str();
// if interpolated_pairs.is_empty() {
// Expr::Str(final_str)
// } else {
// let tuple_ref = arena.alloc((interpolated_pairs.into_bump_slice(), final_str));
// Expr::InterpolatedStr(tuple_ref)
// }
// } else {
// Expr::MalformedStr(problems.into_boxed_slice())
// }
// }
// fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Located<V> {
// let start_line = state.line;
// let start_col = state.column + buf_len as u16;
// let end_line = start_line;
// // All invalid chars should have a length of 1
// let end_col = state.column + 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// Loc { region, value }
// }
// fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Located<V> {
// let start_line = state.line;
// let start_col = state.column + buf_len as u16;
// let end_line = start_line;
// // escapes should all be 2 chars long
// let end_col = state.column + 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// Loc { region, value }
// }
// fn loc_escaped_unicode<'a, V>(
// value: V,
// state: &State<'a>,
// buf_len: usize,
// hex_str_len: usize,
// ) -> Located<V> {
// let start_line = state.line;
// // +1 due to the `"` which precedes buf.
// let start_col = state.column + buf_len as u16 + 1;
// let end_line = start_line;
// // +3 due to the `\u{` and another + 1 due to the `}`
// // -1 to prevent overshooting because end col is inclusive.
// let end_col = start_col + 3 + hex_str_len as u16 + 1 - 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// Loc { region, value }
// }
// #[inline(always)]
// fn handle_escaped_char<'a, I>(
// arena: &'a Bump,
// state: &State<'a>,
// ch: char,
// chars: &mut Peekable<I>,
// buf: &mut String<'a>,
// problems: &mut Problems,
// ) -> Result<Option<&'a str>, (Fail, State<'a>)>
// where
// I: Iterator<Item = char>,
// {
// match ch {
// '\\' => buf.push('\\'),
// '"' => buf.push('"'),
// 't' => buf.push('\t'),
// 'n' => buf.push('\n'),
// 'r' => buf.push('\r'),
// '0' => buf.push('\0'), // We explicitly support null characters, as we
// // can't be sure we won't receive them from Rust.
// 'u' => handle_escaped_unicode(arena, &state, chars, buf, problems)?,
// '(' => {
// let ident = parse_interpolated_ident(arena, state, chars)?;
// return Ok(Some(ident));
// }
// '\t' => {
// // Report and continue.
// // Tabs are syntax errors, but maybe the rest of the string is fine!
// problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
// }
// '\r' => {
// // Report and continue.
// // Carriage returns aren't allowed in string literals,
// // but maybe the rest of the string is fine!
// problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
// }
// '\n' => {
// // Report and bail out.
// // We can't safely assume where the string was supposed to end.
// problems.push(loc_escaped_char(
// Problem::NewlineInLiteral,
// &state,
// buf.len(),
// ));
// return Err(unexpected_eof(
// buf.len(),
// Attempting::UnicodeEscape,
// state.clone(),
// ));
// }
// _ => {
// // Report and continue.
// // An unsupported escaped char (e.g. \q) shouldn't halt parsing.
// problems.push(loc_escaped_char(
// Problem::UnsupportedEscapedChar,
// &state,
// buf.len(),
// ));
// }
// }
// Ok(None)
// }
// #[inline(always)]
// fn handle_escaped_unicode<'a, I>(
// arena: &'a Bump,
// state: &State<'a>,
// chars: &mut Peekable<I>,
// buf: &mut String<'a>,
// problems: &mut Problems,
// ) -> Result<(), (Fail, State<'a>)>
// where
// I: Iterator<Item = char>,
// {
// // \u{00A0} is how you specify a Unicode code point,
// // so we should always see a '{' next.
// if chars.next() != Some('{') {
// let start_line = state.line;
// // +1 due to the `"` which precedes buf
// let start_col = state.column + 1 + buf.len() as u16;
// let end_line = start_line;
// // All we parsed was `\u`, so end on the column after `\`'s column.
// let end_col = start_col + 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// problems.push(Loc {
// region,
// value: Problem::NoUnicodeDigits,
// });
// // The rest of the string literal might be fine. Keep parsing!
// return Ok(());
// }
// // Record the point in the string literal where we started parsing `\u`
// let start_of_unicode = buf.len();
// // Stores the accumulated unicode digits
// let mut hex_str = String::new_in(arena);
// while let Some(hex_char) = chars.next() {
// match hex_char {
// '}' => {
// // Done! Validate and add it to the buffer.
// match u32::from_str_radix(&hex_str, 16) {
// Ok(code_pt) => {
// if code_pt > 0x10FFFF {
// let start_line = state.line;
// // +1 due to the `"` which precedes buf
// // +3 due to the `\u{` which precedes the hex digits
// let start_col = state.column + 1 + buf.len() as u16 + 3;
// let end_line = start_line;
// // We want to underline only the number. That's the error!
// // -1 because we want to end on the last digit, not
// // overshoot it.
// let end_col = start_col + hex_str.len() as u16 - 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// problems.push(Loc {
// region,
// value: Problem::UnicodeCodePtTooLarge,
// });
// } else {
// // If it all checked out, add it to
// // the main buffer.
// match char::from_u32(code_pt) {
// Some(ch) => buf.push(ch),
// None => {
// problems.push(loc_escaped_unicode(
// Problem::InvalidUnicodeCodePt,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// }
// }
// }
// Err(_) => {
// let problem = if hex_str.is_empty() {
// Problem::NoUnicodeDigits
// } else {
// Problem::NonHexCharsInUnicodeCodePt
// };
// problems.push(loc_escaped_unicode(
// problem,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// }
// // We are now done processing the unicode portion of the string,
// // so exit the loop without further advancing the iterator.
// return Ok(());
// }
// '\t' => {
// // Report and continue.
// // Tabs are syntax errors, but maybe the rest of the string is fine!
// problems.push(loc_escaped_unicode(
// Problem::Tab,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// '\r' => {
// // Report and continue.
// // Carriage returns aren't allowed in string literals,
// // but maybe the rest of the string is fine!
// problems.push(loc_escaped_unicode(
// Problem::CarriageReturn,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// }
// '\n' => {
// // Report and bail out.
// // We can't safely assume where the string was supposed to end.
// problems.push(loc_escaped_unicode(
// Problem::NewlineInLiteral,
// &state,
// start_of_unicode,
// hex_str.len(),
// ));
// return Err(unexpected_eof(
// buf.len(),
// Attempting::UnicodeEscape,
// state.clone(),
// ));
// }
// normal_char => hex_str.push(normal_char),
// }
// // If we're about to hit the end of the string, and we didn't already
// // complete parsing a valid unicode escape sequence, this is a malformed
// // escape sequence - it wasn't terminated!
// if chars.peek() == Some(&'"') {
// // Record a problem and exit the loop early, so the string literal
// // parsing logic can consume the quote and do its job as normal.
// let start_line = state.line;
// // +1 due to the `"` which precedes buf.
// let start_col = state.column + buf.len() as u16 + 1;
// let end_line = start_line;
// // +3 due to the `\u{`
// // -1 to prevent overshooting because end col is inclusive.
// let end_col = start_col + 3 + hex_str.len() as u16 - 1;
// let region = Region {
// start_line,
// start_col,
// end_line,
// end_col,
// };
// problems.push(Loc {
// region,
// value: Problem::MalformedEscapedUnicode,
// });
// return Ok(());
// }
// }
// Ok(())
// }
// #[inline(always)]
// fn parse_interpolated_ident<'a, I>(
// arena: &'a Bump,
// state: &State<'a>,
// chars: &mut Peekable<I>,
// ) -> Result<&'a str, (Fail, State<'a>)>
// where
// I: Iterator<Item = char>,
// {
// // This will return Err on invalid identifiers like "if"
// let ((string, next_char), state) = ident::parse_into(arena, chars, state.clone())?;
// // Make sure we got a closing ) to end the interpolation.
// match next_char {
// Some(')') => Ok(string),
// Some(ch) => Err(unexpected(ch, 0, state, Attempting::InterpolatedString)),
// None => Err(unexpected_eof(0, Attempting::InterpolatedString, state)),
// }
// }