mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-26 13:29:12 +00:00
Graccefully handle overflowing unicode literals
This commit is contained in:
parent
a83f44188b
commit
6ef87b3b9d
11 changed files with 106 additions and 27 deletions
|
@ -5,7 +5,7 @@ use crate::header::{
|
|||
self, AppHeader, HostedHeader, ModuleHeader, ModuleName, PackageHeader, PlatformHeader,
|
||||
};
|
||||
use crate::ident::Accessor;
|
||||
use crate::parser::ESingleQuote;
|
||||
use crate::parser::{ESingleQuote, EString};
|
||||
use bumpalo::collections::{String, Vec};
|
||||
use bumpalo::Bump;
|
||||
use roc_collections::soa::{index_push_new, slice_extend_new};
|
||||
|
@ -360,9 +360,9 @@ pub enum SingleQuoteLiteral<'a> {
|
|||
}
|
||||
|
||||
impl<'a> SingleQuoteLiteral<'a> {
|
||||
pub fn to_str_in(&self, arena: &'a Bump) -> &'a str {
|
||||
pub fn to_str_in(&self, arena: &'a Bump) -> Result<&'a str, EString<'a>> {
|
||||
match self {
|
||||
SingleQuoteLiteral::PlainLine(s) => s,
|
||||
SingleQuoteLiteral::PlainLine(s) => Ok(s),
|
||||
SingleQuoteLiteral::Line(segments) => {
|
||||
let mut s = String::new_in(arena);
|
||||
for segment in *segments {
|
||||
|
@ -370,15 +370,19 @@ impl<'a> SingleQuoteLiteral<'a> {
|
|||
SingleQuoteSegment::Plaintext(s2) => s.push_str(s2),
|
||||
SingleQuoteSegment::Unicode(loc) => {
|
||||
let s2 = loc.value;
|
||||
let c = u32::from_str_radix(s2, 16).expect("Invalid unicode escape");
|
||||
s.push(char::from_u32(c).expect("Invalid unicode codepoint"));
|
||||
let c = u32::from_str_radix(s2, 16)
|
||||
.map_err(|_| EString::UnicodeEscapeTooLarge(loc.region))?;
|
||||
s.push(
|
||||
char::from_u32(c)
|
||||
.ok_or(EString::InvalidUnicodeCodepoint(loc.region))?,
|
||||
);
|
||||
}
|
||||
SingleQuoteSegment::EscapedChar(c) => {
|
||||
s.push(c.unescape());
|
||||
}
|
||||
}
|
||||
}
|
||||
s.into_bump_str()
|
||||
Ok(s.into_bump_str())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3872,13 +3872,17 @@ fn apply_expr_access_chain<'a>(
|
|||
}
|
||||
|
||||
fn string_like_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EString<'a>> {
|
||||
map_with_arena(
|
||||
then(
|
||||
crate::string_literal::parse_str_like_literal(),
|
||||
|arena, lit| match lit {
|
||||
StrLikeLiteral::Str(s) => Expr::Str(s),
|
||||
|arena, state, progress, lit| match lit {
|
||||
StrLikeLiteral::Str(s) => Ok((progress, Expr::Str(s), state)),
|
||||
StrLikeLiteral::SingleQuote(s) => {
|
||||
// TODO: preserve the original escaping
|
||||
Expr::SingleQuote(s.to_str_in(arena))
|
||||
Ok((
|
||||
progress,
|
||||
Expr::SingleQuote(s.to_str_in(arena).map_err(|e| (MadeProgress, e))?),
|
||||
state,
|
||||
))
|
||||
}
|
||||
},
|
||||
)
|
||||
|
|
|
@ -1162,6 +1162,12 @@ impl<'a> Normalize<'a> for EString<'a> {
|
|||
EString::ExpectedDoubleQuoteGotSingleQuote(_) => {
|
||||
EString::ExpectedDoubleQuoteGotSingleQuote(Position::zero())
|
||||
}
|
||||
EString::InvalidUnicodeCodepoint(_region) => {
|
||||
EString::InvalidUnicodeCodepoint(Region::zero())
|
||||
}
|
||||
EString::UnicodeEscapeTooLarge(_region) => {
|
||||
EString::UnicodeEscapeTooLarge(Region::zero())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1248,6 +1254,7 @@ impl<'a> Normalize<'a> for EPattern<'a> {
|
|||
EPattern::AsIndentStart(_) => EPattern::AsIndentStart(Position::zero()),
|
||||
EPattern::AccessorFunction(_) => EPattern::AccessorFunction(Position::zero()),
|
||||
EPattern::RecordUpdaterFunction(_) => EPattern::RecordUpdaterFunction(Position::zero()),
|
||||
EPattern::Str(e, _) => EPattern::Str(e.normalize(arena), Position::zero()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -642,6 +642,8 @@ pub enum EString<'a> {
|
|||
FormatEnd(Position),
|
||||
MultilineInsufficientIndent(Position),
|
||||
ExpectedDoubleQuoteGotSingleQuote(Position),
|
||||
InvalidUnicodeCodepoint(Region),
|
||||
UnicodeEscapeTooLarge(Region),
|
||||
}
|
||||
|
||||
impl<'a> EString<'a> {
|
||||
|
@ -663,6 +665,9 @@ impl<'a> EString<'a> {
|
|||
| EString::FormatEnd(p)
|
||||
| EString::MultilineInsufficientIndent(p)
|
||||
| EString::ExpectedDoubleQuoteGotSingleQuote(p) => Region::from_pos(*p),
|
||||
EString::InvalidUnicodeCodepoint(region) | EString::UnicodeEscapeTooLarge(region) => {
|
||||
*region
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1066,6 +1071,7 @@ pub enum EPattern<'a> {
|
|||
|
||||
AccessorFunction(Position),
|
||||
RecordUpdaterFunction(Position),
|
||||
Str(EString<'a>, Position),
|
||||
}
|
||||
|
||||
impl<'a> EPattern<'a> {
|
||||
|
@ -1075,6 +1081,7 @@ impl<'a> EPattern<'a> {
|
|||
EPattern::Record(expr, _) => expr.get_region(),
|
||||
EPattern::List(expr, _) => expr.get_region(),
|
||||
EPattern::PInParens(expr, _) => expr.get_region(),
|
||||
EPattern::Str(e_string, _) => e_string.get_region(),
|
||||
|
||||
// Cases with Position values
|
||||
EPattern::AsKeyword(position)
|
||||
|
|
|
@ -3,9 +3,9 @@ use crate::blankspace::{space0_e, spaces, spaces_before};
|
|||
use crate::ident::{lowercase_ident, parse_ident, Accessor, Ident};
|
||||
use crate::keyword;
|
||||
use crate::parser::{
|
||||
self, backtrackable, byte, collection_trailing_sep_e, fail_when, loc, map, map_with_arena,
|
||||
optional, skip_first, specialize_err, specialize_err_ref, then, three_bytes, two_bytes,
|
||||
zero_or_more, EPattern, PInParens, PList, PRecord, Parser,
|
||||
self, backtrackable, byte, collection_trailing_sep_e, fail_when, loc, map, optional,
|
||||
skip_first, specialize_err, specialize_err_ref, then, three_bytes, two_bytes, zero_or_more,
|
||||
EPattern, PInParens, PList, PRecord, Parser,
|
||||
};
|
||||
use crate::parser::{either, Progress::*};
|
||||
use crate::state::State;
|
||||
|
@ -251,18 +251,25 @@ fn number_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
|||
}
|
||||
|
||||
fn string_like_pattern_help<'a>() -> impl Parser<'a, Pattern<'a>, EPattern<'a>> {
|
||||
specialize_err(
|
||||
|_, pos| EPattern::Start(pos),
|
||||
map_with_arena(
|
||||
then(
|
||||
specialize_err(
|
||||
|_, pos| EPattern::Start(pos),
|
||||
crate::string_literal::parse_str_like_literal(),
|
||||
|arena, lit| match lit {
|
||||
StrLikeLiteral::Str(s) => Pattern::StrLiteral(s),
|
||||
StrLikeLiteral::SingleQuote(s) => {
|
||||
// TODO: preserve the original escaping
|
||||
Pattern::SingleQuote(s.to_str_in(arena))
|
||||
}
|
||||
},
|
||||
),
|
||||
|arena, state, progress, lit| match lit {
|
||||
StrLikeLiteral::Str(s) => Ok((progress, Pattern::StrLiteral(s), state)),
|
||||
StrLikeLiteral::SingleQuote(s) => {
|
||||
// TODO: preserve the original escaping
|
||||
Ok((
|
||||
progress,
|
||||
Pattern::SingleQuote(
|
||||
s.to_str_in(arena)
|
||||
.map_err(|e| (MadeProgress, EPattern::Str(e, state.pos())))?,
|
||||
),
|
||||
state,
|
||||
))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
@ -295,7 +295,7 @@ pub fn parse_str_like_literal<'a>() -> impl Parser<'a, StrLikeLiteral<'a>, EStri
|
|||
// -> TODO: do we want to change this?
|
||||
|
||||
// Simply by decoding this, it's guaranteed to be valid utf-8
|
||||
let text = expr.to_str_in(arena);
|
||||
let text = expr.to_str_in(arena).map_err(|e| (MadeProgress, e))?;
|
||||
|
||||
if text.len() > 5 {
|
||||
return Err((
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue