Improve parsing of scalar literals

* Unify parsing of string literals and scalar literals, to (e.g.) ensure escapes are handled uniformly. Notably, this makes unicode escapes valid in scalar literals.
* Add a variety of custom error messages about specific failure cases of parsing string/scalar literals. For example, if we're expecting a string (e.g. a package name in the header) and the user tried using single quotes, give a clear message about that.
* Fix formatting of unicode escapes (they previously used {}, now correctly use () to match roc strings)
This commit is contained in:
Joshua Warner 2023-01-07 14:41:08 -08:00
parent 6fc593142d
commit 94070e8ba6
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
22 changed files with 411 additions and 173 deletions

View file

@ -2,6 +2,7 @@ use std::fmt::Debug;
use crate::header::{AppHeader, HostedHeader, InterfaceHeader, PackageHeader, PlatformHeader};
use crate::ident::Ident;
use crate::parser::ESingleQuote;
use bumpalo::collections::{String, Vec};
use bumpalo::Bump;
use roc_collections::soa::{EitherIndex, Index, Slice};
@ -116,11 +117,20 @@ pub enum StrSegment<'a> {
Interpolated(Loc<&'a Expr<'a>>), // e.g. (name) in "Hi, \(name)!"
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SingleQuoteSegment<'a> {
Plaintext(&'a str), // e.g. 'f'
Unicode(Loc<&'a str>), // e.g. '00A0' in '\u(00A0)'
EscapedChar(EscapedChar), // e.g. '\n'
// No interpolated expressions in single-quoted strings
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum EscapedChar {
Newline, // \n
Tab, // \t
Quote, // \"
DoubleQuote, // \"
SingleQuote, // \'
Backslash, // \\
CarriageReturn, // \r
}
@ -132,12 +142,71 @@ impl EscapedChar {
match self {
Backslash => '\\',
Quote => '"',
SingleQuote => '\'',
DoubleQuote => '"',
CarriageReturn => 'r',
Tab => 't',
Newline => 'n',
}
}
pub fn unescape(self) -> char {
use EscapedChar::*;
match self {
Backslash => '\\',
SingleQuote => '\'',
DoubleQuote => '"',
CarriageReturn => '\r',
Tab => '\t',
Newline => '\n',
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SingleQuoteLiteral<'a> {
/// The most common case: a plain character with no escapes
PlainLine(&'a str),
Line(&'a [SingleQuoteSegment<'a>]),
}
impl<'a> SingleQuoteLiteral<'a> {
pub fn to_str_in(&self, arena: &'a Bump) -> &'a str {
match self {
SingleQuoteLiteral::PlainLine(s) => s,
SingleQuoteLiteral::Line(segments) => {
let mut s = String::new_in(arena);
for segment in *segments {
match segment {
SingleQuoteSegment::Plaintext(s2) => s.push_str(s2),
SingleQuoteSegment::Unicode(loc) => {
let s2 = loc.value;
let c = u32::from_str_radix(s2, 16).expect("Invalid unicode escape");
s.push(char::from_u32(c).expect("Invalid unicode codepoint"));
}
SingleQuoteSegment::EscapedChar(c) => {
s.push(c.unescape());
}
}
}
s.into_bump_str()
}
}
}
}
impl<'a> TryFrom<StrSegment<'a>> for SingleQuoteSegment<'a> {
type Error = ESingleQuote;
fn try_from(value: StrSegment<'a>) -> Result<Self, Self::Error> {
match value {
StrSegment::Plaintext(s) => Ok(SingleQuoteSegment::Plaintext(s)),
StrSegment::Unicode(s) => Ok(SingleQuoteSegment::Unicode(s)),
StrSegment::EscapedChar(s) => Ok(SingleQuoteSegment::EscapedChar(s)),
StrSegment::Interpolated(_) => Err(ESingleQuote::InterpolationNotAllowed),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]