mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-28 22:34:45 +00:00
Canonicalize unicode code point escapes
This commit is contained in:
parent
7682e09b0a
commit
2e15443c8c
4 changed files with 125 additions and 61 deletions
|
@ -22,7 +22,7 @@ use roc_region::all::{Located, Region};
|
||||||
use roc_types::subs::{VarStore, Variable};
|
use roc_types::subs::{VarStore, Variable};
|
||||||
use roc_types::types::Alias;
|
use roc_types::types::Alias;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::i64;
|
use std::{char, i64, u32};
|
||||||
|
|
||||||
#[derive(Clone, Default, Debug, PartialEq)]
|
#[derive(Clone, Default, Debug, PartialEq)]
|
||||||
pub struct Output {
|
pub struct Output {
|
||||||
|
@ -1359,7 +1359,7 @@ fn flatten_str_lines<'a>(
|
||||||
use ast::StrSegment::*;
|
use ast::StrSegment::*;
|
||||||
|
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
let mut interpolations = Vec::new();
|
let mut segments = Vec::new();
|
||||||
let mut output = Output::default();
|
let mut output = Output::default();
|
||||||
|
|
||||||
for line in lines {
|
for line in lines {
|
||||||
|
@ -1368,11 +1368,41 @@ fn flatten_str_lines<'a>(
|
||||||
Plaintext(string) => {
|
Plaintext(string) => {
|
||||||
buf.push_str(string);
|
buf.push_str(string);
|
||||||
}
|
}
|
||||||
Unicode(loc_digits) => {
|
Unicode(loc_hex_digits) => match u32::from_str_radix(loc_hex_digits.value, 16) {
|
||||||
todo!("parse unicode digits {:?}", loc_digits);
|
Ok(code_pt) => match char::from_u32(code_pt) {
|
||||||
}
|
Some(ch) => {
|
||||||
|
buf.push(ch);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
env.problem(Problem::InvalidUnicodeCodePoint(loc_hex_digits.region));
|
||||||
|
|
||||||
|
return (
|
||||||
|
Expr::RuntimeError(RuntimeError::InvalidUnicodeCodePoint(
|
||||||
|
loc_hex_digits.region,
|
||||||
|
)),
|
||||||
|
output,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
env.problem(Problem::InvalidHexadecimal(loc_hex_digits.region));
|
||||||
|
|
||||||
|
return (
|
||||||
|
Expr::RuntimeError(RuntimeError::InvalidHexadecimal(
|
||||||
|
loc_hex_digits.region,
|
||||||
|
)),
|
||||||
|
output,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
Interpolated(loc_expr) => {
|
Interpolated(loc_expr) => {
|
||||||
if is_valid_interpolation(loc_expr.value) {
|
if is_valid_interpolation(loc_expr.value) {
|
||||||
|
if !buf.is_empty() {
|
||||||
|
segments.push(StrSegment::Plaintext(buf.into()));
|
||||||
|
|
||||||
|
buf = String::new();
|
||||||
|
}
|
||||||
|
|
||||||
let (loc_expr, new_output) = canonicalize_expr(
|
let (loc_expr, new_output) = canonicalize_expr(
|
||||||
env,
|
env,
|
||||||
var_store,
|
var_store,
|
||||||
|
@ -1383,7 +1413,7 @@ fn flatten_str_lines<'a>(
|
||||||
|
|
||||||
output.union(new_output);
|
output.union(new_output);
|
||||||
|
|
||||||
interpolations.push(StrSegment::Interpolation(loc_expr));
|
segments.push(StrSegment::Interpolation(loc_expr));
|
||||||
} else {
|
} else {
|
||||||
env.problem(Problem::InvalidInterpolation(loc_expr.region));
|
env.problem(Problem::InvalidInterpolation(loc_expr.region));
|
||||||
|
|
||||||
|
@ -1398,7 +1428,11 @@ fn flatten_str_lines<'a>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(Expr::Str(interpolations), output)
|
if !buf.is_empty() {
|
||||||
|
segments.push(StrSegment::Plaintext(buf.into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
(Expr::Str(segments), output)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the char that would have been originally parsed to
|
/// Returns the char that would have been originally parsed to
|
||||||
|
|
|
@ -15,7 +15,7 @@ mod test_can {
|
||||||
use crate::helpers::{can_expr_with, test_home, CanExprOut};
|
use crate::helpers::{can_expr_with, test_home, CanExprOut};
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use roc_can::expr::Expr::{self, *};
|
use roc_can::expr::Expr::{self, *};
|
||||||
use roc_can::expr::Recursive;
|
use roc_can::expr::{Recursive, StrSegment};
|
||||||
use roc_problem::can::{FloatErrorKind, IntErrorKind, Problem, RuntimeError};
|
use roc_problem::can::{FloatErrorKind, IntErrorKind, Problem, RuntimeError};
|
||||||
use roc_region::all::Region;
|
use roc_region::all::Region;
|
||||||
use std::{f64, i64};
|
use std::{f64, i64};
|
||||||
|
@ -69,6 +69,10 @@ mod test_can {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn expr_str(contents: &str) -> Expr {
|
||||||
|
Expr::Str(vec![StrSegment::Plaintext(contents.into())])
|
||||||
|
}
|
||||||
|
|
||||||
// NUMBER LITERALS
|
// NUMBER LITERALS
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1179,62 +1183,61 @@ mod test_can {
|
||||||
//}
|
//}
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
//// STRING LITERALS
|
// STRING LITERALS
|
||||||
|
|
||||||
//
|
#[test]
|
||||||
// #[test]
|
fn string_with_valid_unicode_escapes() {
|
||||||
// fn string_with_valid_unicode_escapes() {
|
assert_can(r#""x\u(00A0)x""#, expr_str("x\u{00A0}x"));
|
||||||
// expect_parsed_str("x\u{00A0}x", r#""x\u{00A0}x""#);
|
assert_can(r#""x\u(101010)x""#, expr_str("x\u{101010}x"));
|
||||||
// expect_parsed_str("x\u{101010}x", r#""x\u{101010}x""#);
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_too_large_unicode_escape() {
|
// fn string_with_too_large_unicode_escape() {
|
||||||
// // Should be too big - max size should be 10FFFF.
|
// // Should be too big - max size should be 10FFFF.
|
||||||
// // (Rust has this restriction. I assume it's a good idea.)
|
// // (Rust has this restriction. I assume it's a good idea.)
|
||||||
// assert_malformed_str(
|
// assert_malformed_str(
|
||||||
// r#""abc\u{110000}def""#,
|
// r#""abc\u{110000}def""#,
|
||||||
// vec![Located::new(0, 7, 0, 12, Problem::UnicodeCodePointTooLarge)],
|
// vec![Located::new(0, 7, 0, 12, Problem::UnicodeCodePointTooLarge)],
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_no_unicode_digits() {
|
// fn string_with_no_unicode_digits() {
|
||||||
// // No digits specified
|
// // No digits specified
|
||||||
// assert_malformed_str(
|
// assert_malformed_str(
|
||||||
// r#""blah\u{}foo""#,
|
// r#""blah\u{}foo""#,
|
||||||
// vec![Located::new(0, 5, 0, 8, Problem::NoUnicodeDigits)],
|
// vec![Located::new(0, 5, 0, 8, Problem::NoUnicodeDigits)],
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_no_unicode_opening_brace() {
|
// fn string_with_no_unicode_opening_brace() {
|
||||||
// // No opening curly brace. It can't be sure if the closing brace
|
// // No opening curly brace. It can't be sure if the closing brace
|
||||||
// // was intended to be a closing brace for the unicode escape, so
|
// // was intended to be a closing brace for the unicode escape, so
|
||||||
// // report that there were no digits specified.
|
// // report that there were no digits specified.
|
||||||
// assert_malformed_str(
|
// assert_malformed_str(
|
||||||
// r#""abc\u00A0}def""#,
|
// r#""abc\u00A0}def""#,
|
||||||
// vec![Located::new(0, 4, 0, 5, Problem::NoUnicodeDigits)],
|
// vec![Located::new(0, 4, 0, 5, Problem::NoUnicodeDigits)],
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_no_unicode_closing_brace() {
|
// fn string_with_no_unicode_closing_brace() {
|
||||||
// // No closing curly brace
|
// // No closing curly brace
|
||||||
// assert_malformed_str(
|
// assert_malformed_str(
|
||||||
// r#""blah\u{stuff""#,
|
// r#""blah\u{stuff""#,
|
||||||
// vec![Located::new(0, 5, 0, 12, Problem::MalformedEscapedUnicode)],
|
// vec![Located::new(0, 5, 0, 12, Problem::MalformedEscapedUnicode)],
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_no_unicode_braces() {
|
// fn string_with_no_unicode_braces() {
|
||||||
// // No curly braces
|
// // No curly braces
|
||||||
// assert_malformed_str(
|
// assert_malformed_str(
|
||||||
// r#""zzzz\uzzzzz""#,
|
// r#""zzzz\uzzzzz""#,
|
||||||
// vec![Located::new(0, 5, 0, 6, Problem::NoUnicodeDigits)],
|
// vec![Located::new(0, 5, 0, 6, Problem::NoUnicodeDigits)],
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_with_escaped_interpolation() {
|
// fn string_with_escaped_interpolation() {
|
||||||
|
@ -1242,13 +1245,12 @@ mod test_can {
|
||||||
// // This should NOT be string interpolation, because of the \\
|
// // This should NOT be string interpolation, because of the \\
|
||||||
// indoc!(
|
// indoc!(
|
||||||
// r#"
|
// r#"
|
||||||
// "abcd\\(efg)hij"
|
// "abcd\\(efg)hij"
|
||||||
// "#
|
// "#
|
||||||
// ),
|
// ),
|
||||||
// Str(r#"abcd\(efg)hij"#.into()),
|
// Str(r#"abcd\(efg)hij"#.into()),
|
||||||
// );
|
// );
|
||||||
// }
|
// }
|
||||||
//
|
|
||||||
|
|
||||||
// #[test]
|
// #[test]
|
||||||
// fn string_without_escape() {
|
// fn string_without_escape() {
|
||||||
|
|
|
@ -56,6 +56,8 @@ pub enum Problem {
|
||||||
region: Region,
|
region: Region,
|
||||||
},
|
},
|
||||||
InvalidInterpolation(Region),
|
InvalidInterpolation(Region),
|
||||||
|
InvalidHexadecimal(Region),
|
||||||
|
InvalidUnicodeCodePoint(Region),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
@ -127,6 +129,8 @@ pub enum RuntimeError {
|
||||||
NonExhaustivePattern,
|
NonExhaustivePattern,
|
||||||
|
|
||||||
InvalidInterpolation(Region),
|
InvalidInterpolation(Region),
|
||||||
|
InvalidHexadecimal(Region),
|
||||||
|
InvalidUnicodeCodePoint(Region),
|
||||||
|
|
||||||
/// When the author specifies a type annotation but no implementation
|
/// When the author specifies a type annotation but no implementation
|
||||||
NoImplementation,
|
NoImplementation,
|
||||||
|
|
|
@ -262,6 +262,18 @@ pub fn can_problem<'b>(
|
||||||
alloc.reflow(" can occur in this position."),
|
alloc.reflow(" can occur in this position."),
|
||||||
]),
|
]),
|
||||||
]),
|
]),
|
||||||
|
Problem::InvalidHexadecimal(region) => {
|
||||||
|
todo!(
|
||||||
|
"TODO report an invalid hexadecimal number in a \\u(...) code point at region {:?}",
|
||||||
|
region
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Problem::InvalidUnicodeCodePoint(region) => {
|
||||||
|
todo!(
|
||||||
|
"TODO report an invalid \\u(...) code point at region {:?}",
|
||||||
|
region
|
||||||
|
);
|
||||||
|
}
|
||||||
Problem::InvalidInterpolation(region) => {
|
Problem::InvalidInterpolation(region) => {
|
||||||
todo!(
|
todo!(
|
||||||
"TODO report an invalid string interpolation at region {:?}",
|
"TODO report an invalid string interpolation at region {:?}",
|
||||||
|
@ -530,6 +542,18 @@ fn pretty_runtime_error<'b>(
|
||||||
alloc.region(region),
|
alloc.region(region),
|
||||||
alloc.reflow("Only variables can be updated with record update syntax."),
|
alloc.reflow("Only variables can be updated with record update syntax."),
|
||||||
]),
|
]),
|
||||||
|
RuntimeError::InvalidHexadecimal(region) => {
|
||||||
|
todo!(
|
||||||
|
"TODO runtime error for an invalid hexadecimal number in a \\u(...) code point at region {:?}",
|
||||||
|
region
|
||||||
|
);
|
||||||
|
}
|
||||||
|
RuntimeError::InvalidUnicodeCodePoint(region) => {
|
||||||
|
todo!(
|
||||||
|
"TODO runtime error for an invalid \\u(...) code point at region {:?}",
|
||||||
|
region
|
||||||
|
);
|
||||||
|
}
|
||||||
RuntimeError::InvalidInterpolation(region) => {
|
RuntimeError::InvalidInterpolation(region) => {
|
||||||
todo!(
|
todo!(
|
||||||
"TODO runtime error for an invalid string interpolation at region {:?}",
|
"TODO runtime error for an invalid string interpolation at region {:?}",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue