Graccefully handle overflowing unicode literals

This commit is contained in:
Joshua Warner 2024-12-14 22:04:20 -08:00
parent a83f44188b
commit 6ef87b3b9d
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
11 changed files with 106 additions and 27 deletions

View file

@ -5,7 +5,7 @@ use crate::header::{
self, AppHeader, HostedHeader, ModuleHeader, ModuleName, PackageHeader, PlatformHeader,
};
use crate::ident::Accessor;
use crate::parser::ESingleQuote;
use crate::parser::{ESingleQuote, EString};
use bumpalo::collections::{String, Vec};
use bumpalo::Bump;
use roc_collections::soa::{index_push_new, slice_extend_new};
@ -360,9 +360,9 @@ pub enum SingleQuoteLiteral<'a> {
}
impl<'a> SingleQuoteLiteral<'a> {
pub fn to_str_in(&self, arena: &'a Bump) -> &'a str {
pub fn to_str_in(&self, arena: &'a Bump) -> Result<&'a str, EString<'a>> {
match self {
SingleQuoteLiteral::PlainLine(s) => s,
SingleQuoteLiteral::PlainLine(s) => Ok(s),
SingleQuoteLiteral::Line(segments) => {
let mut s = String::new_in(arena);
for segment in *segments {
@ -370,15 +370,19 @@ impl<'a> SingleQuoteLiteral<'a> {
SingleQuoteSegment::Plaintext(s2) => s.push_str(s2),
SingleQuoteSegment::Unicode(loc) => {
let s2 = loc.value;
let c = u32::from_str_radix(s2, 16).expect("Invalid unicode escape");
s.push(char::from_u32(c).expect("Invalid unicode codepoint"));
let c = u32::from_str_radix(s2, 16)
.map_err(|_| EString::UnicodeEscapeTooLarge(loc.region))?;
s.push(
char::from_u32(c)
.ok_or(EString::InvalidUnicodeCodepoint(loc.region))?,
);
}
SingleQuoteSegment::EscapedChar(c) => {
s.push(c.unescape());
}
}
}
s.into_bump_str()
Ok(s.into_bump_str())
}
}
}

View file

@ -3872,13 +3872,17 @@ fn apply_expr_access_chain<'a>(
}
fn string_like_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
map_with_arena(
then(
crate::string_literal::parse_str_like_literal(),
|arena, lit| match lit {
StrLikeLiteral::Str(s) => Expr::Str(s),
|arena, state, progress, lit| match lit {
StrLikeLiteral::Str(s) => Ok((progress, Expr::Str(s), state)),
StrLikeLiteral::SingleQuote(s) => {
// TODO: preserve the original escaping
Expr::SingleQuote(s.to_str_in(arena))
Ok((
progress,
Expr::SingleQuote(s.to_str_in(arena).map_err(|e| (MadeProgress, e))?),
state,
))
}
},
)

View file

@ -1162,6 +1162,12 @@ impl<'a> Normalize<'a> for EString<'a> {
EString::ExpectedDoubleQuoteGotSingleQuote(_) => {
EString::ExpectedDoubleQuoteGotSingleQuote(Position::zero())
}
EString::InvalidUnicodeCodepoint(_region) => {
EString::InvalidUnicodeCodepoint(Region::zero())
}
EString::UnicodeEscapeTooLarge(_region) => {
EString::UnicodeEscapeTooLarge(Region::zero())
}
}
}
}
@ -1248,6 +1254,7 @@ impl<'a> Normalize<'a> for EPattern<'a> {
EPattern::AsIndentStart(_) => EPattern::AsIndentStart(Position::zero()),
EPattern::AccessorFunction(_) => EPattern::AccessorFunction(Position::zero()),
EPattern::RecordUpdaterFunction(_) => EPattern::RecordUpdaterFunction(Position::zero()),
EPattern::Str(e, _) => EPattern::Str(e.normalize(arena), Position::zero()),
}
}
}

View file

@ -642,6 +642,8 @@ pub enum EString<'a> {
FormatEnd(Position),
MultilineInsufficientIndent(Position),
ExpectedDoubleQuoteGotSingleQuote(Position),
InvalidUnicodeCodepoint(Region),
UnicodeEscapeTooLarge(Region),
}
impl<'a> EString<'a> {
@ -663,6 +665,9 @@ impl<'a> EString<'a> {
| EString::FormatEnd(p)
| EString::MultilineInsufficientIndent(p)
| EString::ExpectedDoubleQuoteGotSingleQuote(p) => Region::from_pos(*p),
EString::InvalidUnicodeCodepoint(region) | EString::UnicodeEscapeTooLarge(region) => {
*region
}
}
}
}
@ -1066,6 +1071,7 @@ pub enum EPattern<'a> {
AccessorFunction(Position),
RecordUpdaterFunction(Position),
Str(EString<'a>, Position),
}
impl<'a> EPattern<'a> {
@ -1075,6 +1081,7 @@ impl<'a> EPattern<'a> {
EPattern::Record(expr, _) => expr.get_region(),
EPattern::List(expr, _) => expr.get_region(),
EPattern::PInParens(expr, _) => expr.get_region(),
EPattern::Str(e_string, _) => e_string.get_region(),
// Cases with Position values
EPattern::AsKeyword(position)

View file

@ -3,9 +3,9 @@ use crate::blankspace::{space0_e, spaces, spaces_before};
use crate::ident::{lowercase_ident, parse_ident, Accessor, Ident};
use crate::keyword;
use crate::parser::{
self, backtrackable, byte, collection_trailing_sep_e, fail_when, loc, map, map_with_arena,
optional, skip_first, specialize_err, specialize_err_ref, then, three_bytes, two_bytes,
zero_or_more, EPattern, PInParens, PList, PRecord, Parser,
self, backtrackable, byte, collection_trailing_sep_e, fail_when, loc, map, optional,
skip_first, specialize_err, specialize_err_ref, then, three_bytes, two_bytes, zero_or_more,
EPattern, PInParens, PList, PRecord, Parser,
};
use crate::parser::{either, Progress::*};
use crate::state::State;
@ -251,18 +251,25 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
}
fn string_like_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
specialize_err(
|_, pos| EPattern::Start(pos),
map_with_arena(
then(
specialize_err(
|_, pos| EPattern::Start(pos),
crate::string_literal::parse_str_like_literal(),
|arena, lit| match lit {
StrLikeLiteral::Str(s) => Pattern::StrLiteral(s),
StrLikeLiteral::SingleQuote(s) => {
// TODO: preserve the original escaping
Pattern::SingleQuote(s.to_str_in(arena))
}
},
),
|arena, state, progress, lit| match lit {
StrLikeLiteral::Str(s) => Ok((progress, Pattern::StrLiteral(s), state)),
StrLikeLiteral::SingleQuote(s) => {
// TODO: preserve the original escaping
Ok((
progress,
Pattern::SingleQuote(
s.to_str_in(arena)
.map_err(|e| (MadeProgress, EPattern::Str(e, state.pos())))?,
),
state,
))
}
},
)
}

View file

@ -295,7 +295,7 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
// -> TODO: do we want to change this?
// Simply by decoding this, it's guaranteed to be valid utf-8
let text = expr.to_str_in(arena);
let text = expr.to_str_in(arena).map_err(|e| (MadeProgress, e))?;
if text.len() > 5 {
return Err((