mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-30 23:31:12 +00:00
Fix some line length and attempting issues.
This commit is contained in:
parent
2ad510ba77
commit
dfd1d4bbb4
10 changed files with 430 additions and 271 deletions
|
@ -114,4 +114,5 @@ pub enum Attempting {
|
|||
NumberLiteral,
|
||||
UnicodeEscape,
|
||||
Expression,
|
||||
Module,
|
||||
}
|
||||
|
|
|
@ -4,13 +4,14 @@ pub mod parser;
|
|||
pub mod problems;
|
||||
pub mod string_literal;
|
||||
|
||||
use parse::ast::Expr;
|
||||
use parse::ast::{Attempting, Expr};
|
||||
use parse::number_literal::number_literal;
|
||||
use parse::parser::Parser;
|
||||
use parse::parser::{attempt, one_of2, Parser};
|
||||
use parse::string_literal::string_literal;
|
||||
|
||||
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||
parser::one_of2(string_literal(), number_literal())
|
||||
attempt(
|
||||
Attempting::Expression,
|
||||
one_of2(number_literal(), string_literal()),
|
||||
)
|
||||
}
|
||||
|
||||
const KW_IF: &'static str = "if";
|
||||
|
|
|
@ -121,7 +121,7 @@ where
|
|||
};
|
||||
|
||||
let total_chars_parsed = before_decimal.len() + chars_skipped;
|
||||
let state = state.advance_without_indenting(total_chars_parsed);
|
||||
let state = state.advance_without_indenting(total_chars_parsed)?;
|
||||
|
||||
Ok((expr, state))
|
||||
}
|
||||
|
|
|
@ -49,19 +49,23 @@ impl<'a> State<'a> {
|
|||
|
||||
/// Increments the line, then resets column, indent_col, and is_indenting.
|
||||
/// This does *not* advance the input.
|
||||
pub fn newline(&self) -> Self {
|
||||
let line = self
|
||||
.line
|
||||
.checked_add(1)
|
||||
.unwrap_or_else(panic_max_line_count_exceeded);
|
||||
|
||||
State {
|
||||
pub fn newline(&self) -> Result<Self, (Fail, Self)> {
|
||||
match self.line.checked_add(1) {
|
||||
Some(line) => Ok(State {
|
||||
input: self.input,
|
||||
line,
|
||||
column: 0,
|
||||
indent_col: 1,
|
||||
is_indenting: true,
|
||||
attempting: self.attempting,
|
||||
}),
|
||||
None => Err((
|
||||
Fail {
|
||||
reason: FailReason::TooManyLines,
|
||||
attempting: self.attempting,
|
||||
},
|
||||
self.clone(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,16 +73,10 @@ impl<'a> State<'a> {
|
|||
/// This assumes we are *not* advancing with spaces, or at least that
|
||||
/// any spaces on the line were preceded by non-spaces - which would mean
|
||||
/// they weren't eligible to indent anyway.
|
||||
pub fn advance_without_indenting(&self, quantity: usize) -> Self {
|
||||
let column_usize = (self.column as usize)
|
||||
.checked_add(quantity)
|
||||
.unwrap_or_else(panic_max_line_length_exceeded);
|
||||
|
||||
if column_usize > std::u16::MAX as usize {
|
||||
panic_max_line_length_exceeded();
|
||||
}
|
||||
|
||||
State {
|
||||
pub fn advance_without_indenting(&self, quantity: usize) -> Result<Self, (Fail, Self)> {
|
||||
match (self.column as usize).checked_add(quantity) {
|
||||
Some(column_usize) if column_usize <= std::u16::MAX as usize => {
|
||||
Ok(State {
|
||||
input: &self.input[quantity..],
|
||||
line: self.line,
|
||||
column: column_usize as u16,
|
||||
|
@ -86,22 +84,22 @@ impl<'a> State<'a> {
|
|||
// Once we hit a nonspace character, we are no longer indenting.
|
||||
is_indenting: false,
|
||||
attempting: self.attempting,
|
||||
})
|
||||
}
|
||||
_ => Err((
|
||||
Fail {
|
||||
reason: FailReason::LineTooLong(self.line),
|
||||
attempting: self.attempting,
|
||||
},
|
||||
self.clone(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
/// Advance the parser while also indenting as appropriate.
|
||||
/// This assumes we are only advancing with spaces, since they can indent.
|
||||
pub fn advance_spaces(&self, spaces: usize) -> Self {
|
||||
// We'll cast this to u16 later.
|
||||
debug_assert!(spaces <= std::u16::MAX as usize);
|
||||
|
||||
let column_usize = (self.column as usize)
|
||||
.checked_add(spaces)
|
||||
.unwrap_or_else(panic_max_line_length_exceeded);
|
||||
|
||||
if column_usize > std::u16::MAX as usize {
|
||||
panic_max_line_length_exceeded();
|
||||
}
|
||||
|
||||
pub fn advance_spaces(&self, spaces: usize) -> Result<Self, (Fail, Self)> {
|
||||
match (self.column as usize).checked_add(spaces) {
|
||||
Some(column_usize) if column_usize <= std::u16::MAX as usize => {
|
||||
// Spaces don't affect is_indenting; if we were previously indneting,
|
||||
// we still are, and if we already finished indenting, we're still done.
|
||||
let is_indenting = self.is_indenting;
|
||||
|
@ -110,43 +108,37 @@ impl<'a> State<'a> {
|
|||
let indent_col = if is_indenting {
|
||||
// This doesn't need to be checked_add because it's always true that
|
||||
// indent_col <= col, so if this could possibly overflow, we would
|
||||
// already have panicked from the column calculation.
|
||||
// already have errored out from the column calculation.
|
||||
//
|
||||
// Leaving a debug_assert! in case this invariant someday disappers.
|
||||
// Leaving debug assertions in case this invariant someday disappers.
|
||||
debug_assert!(std::u16::MAX - self.indent_col >= spaces as u16);
|
||||
debug_assert!(spaces <= std::u16::MAX as usize);
|
||||
|
||||
self.indent_col + spaces as u16
|
||||
} else {
|
||||
self.indent_col
|
||||
};
|
||||
|
||||
State {
|
||||
Ok(State {
|
||||
input: &self.input[spaces..],
|
||||
line: self.line,
|
||||
column: column_usize as u16,
|
||||
indent_col,
|
||||
is_indenting,
|
||||
attempting: self.attempting,
|
||||
})
|
||||
}
|
||||
_ => Err((
|
||||
Fail {
|
||||
reason: FailReason::LineTooLong(self.line),
|
||||
attempting: self.attempting,
|
||||
},
|
||||
self.clone(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn panic_max_line_count_exceeded() -> u32 {
|
||||
panic!(
|
||||
"Maximum line count exceeded. Roc only supports compiling files with at most {} lines.",
|
||||
std::u32::MAX
|
||||
)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn panic_max_line_length_exceeded() -> usize {
|
||||
panic!(
|
||||
"Maximum line length exceeded. Roc only supports compiling files whose lines each contain no more than {} characters.",
|
||||
std::u16::MAX
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn state_size() {
|
||||
// State should always be under 8 machine words, so it fits in a typical
|
||||
|
@ -157,12 +149,18 @@ fn state_size() {
|
|||
pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)>;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Fail {
|
||||
Unexpected(char, Region, Attempting),
|
||||
ConditionFailed(Attempting),
|
||||
pub enum FailReason {
|
||||
Unexpected(char, Region),
|
||||
ConditionFailed,
|
||||
LineTooLong(u32 /* which line was too long */),
|
||||
TooManyLines,
|
||||
Eof(Region, Attempting),
|
||||
Eof(Region),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Fail {
|
||||
pub attempting: Attempting,
|
||||
pub reason: FailReason,
|
||||
}
|
||||
|
||||
pub trait Parser<'a, Output> {
|
||||
|
@ -235,8 +233,9 @@ pub fn unexpected_eof<'a>(
|
|||
attempting: Attempting,
|
||||
state: State<'a>,
|
||||
) -> (Fail, State<'a>) {
|
||||
checked_unexpected(chars_consumed, state, |region| {
|
||||
Fail::Eof(region, attempting)
|
||||
checked_unexpected(chars_consumed, state, |region| Fail {
|
||||
reason: FailReason::Eof(region),
|
||||
attempting,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -246,8 +245,9 @@ pub fn unexpected<'a>(
|
|||
state: State<'a>,
|
||||
attempting: Attempting,
|
||||
) -> (Fail, State<'a>) {
|
||||
checked_unexpected(chars_consumed, state, |region| {
|
||||
Fail::Unexpected(ch, region, attempting)
|
||||
checked_unexpected(chars_consumed, state, |region| Fail {
|
||||
reason: FailReason::Unexpected(ch, region),
|
||||
attempting,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -274,7 +274,13 @@ where
|
|||
|
||||
(problem_from_region(region), state)
|
||||
}
|
||||
_ => (Fail::LineTooLong(state.line), state),
|
||||
_ => {
|
||||
let reason = FailReason::LineTooLong(state.line);
|
||||
let attempting = state.attempting;
|
||||
let fail = Fail { reason, attempting };
|
||||
|
||||
(fail, state)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -289,7 +295,7 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
|
|||
let len = string.len();
|
||||
|
||||
match input.get(0..len) {
|
||||
Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len))),
|
||||
Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len)?)),
|
||||
_ => Err(unexpected_eof(len, Attempting::Keyword, state)),
|
||||
}
|
||||
}
|
||||
|
@ -307,7 +313,13 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
Err((Fail::ConditionFailed(state.attempting), state))
|
||||
Err((
|
||||
Fail {
|
||||
reason: FailReason::ConditionFailed,
|
||||
attempting: state.attempting,
|
||||
},
|
||||
state,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -348,12 +360,22 @@ where
|
|||
P1: Parser<'a, A>,
|
||||
P2: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)),
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -363,17 +385,27 @@ where
|
|||
P2: Parser<'a, A>,
|
||||
P3: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p3.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)),
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one_of4<'a, P1, P2, P3, P4, A>(p1: P1, p2: P2, p3: P3, p4: P4) -> impl Parser<'a, A>
|
||||
where
|
||||
|
@ -382,7 +414,10 @@ where
|
|||
P3: Parser<'a, A>,
|
||||
P4: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
|
@ -390,12 +425,19 @@ where
|
|||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p4.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)),
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one_of5<'a, P1, P2, P3, P4, P5, A>(
|
||||
p1: P1,
|
||||
|
@ -411,7 +453,10 @@ where
|
|||
P4: Parser<'a, A>,
|
||||
P5: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
|
@ -421,13 +466,20 @@ where
|
|||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p5.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)),
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one_of6<'a, P1, P2, P3, P4, P5, P6, A>(
|
||||
p1: P1,
|
||||
|
@ -445,7 +497,10 @@ where
|
|||
P5: Parser<'a, A>,
|
||||
P6: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
|
@ -457,14 +512,19 @@ where
|
|||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p6.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => {
|
||||
Err((Fail::ConditionFailed(state.attempting), state))
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -486,7 +546,10 @@ where
|
|||
P6: Parser<'a, A>,
|
||||
P7: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
|
@ -500,15 +563,20 @@ where
|
|||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p7.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => {
|
||||
Err((Fail::ConditionFailed(state.attempting), state))
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -532,7 +600,10 @@ where
|
|||
P7: Parser<'a, A>,
|
||||
P8: Parser<'a, A>,
|
||||
{
|
||||
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_attempting = state.attempting;
|
||||
|
||||
match p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p2.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
|
@ -548,9 +619,13 @@ where
|
|||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => match p8.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
Err((_, state)) => {
|
||||
Err((Fail::ConditionFailed(state.attempting), state))
|
||||
}
|
||||
Err((fail, state)) => Err((
|
||||
Fail {
|
||||
attempting: original_attempting,
|
||||
..fail
|
||||
},
|
||||
state,
|
||||
)),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -560,3 +635,4 @@ where
|
|||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
|||
return Ok((
|
||||
Expr::EmptyStr,
|
||||
// 2 because `""` has length 2
|
||||
state.advance_without_indenting(2),
|
||||
state.advance_without_indenting(2)?,
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -69,31 +69,25 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
|||
Expr::MalformedStr(problems.into_boxed_slice())
|
||||
};
|
||||
|
||||
return Ok((expr, state.advance_without_indenting(len_with_quotes)));
|
||||
let next_state = state.advance_without_indenting(len_with_quotes)?;
|
||||
|
||||
return Ok((expr, next_state));
|
||||
}
|
||||
'\t' => {
|
||||
// TODO report the problem and continue.
|
||||
// Tabs are syntax errors, but maybe the rest of the
|
||||
// string is fine!
|
||||
panic!("TODO string had a tab character in it.");
|
||||
// Report the problem and continue. Tabs are syntax errors,
|
||||
// but maybe the rest of the string is fine!
|
||||
problems.push(loc_char(Problem::Tab, &state, buf.len()));
|
||||
}
|
||||
'\r' => {
|
||||
// TODO report the problem and continue.
|
||||
// Carriage returns aren't allowed in string literals,
|
||||
// but maybe the rest of the string is fine!
|
||||
panic!("TODO string had a tab character in it.");
|
||||
}
|
||||
'\0' => {
|
||||
// TODO report the problem and continue.
|
||||
// Null characters aren't allowed in string literals,
|
||||
// but maybe the rest of the string is fine!
|
||||
panic!("TODO string had a \\0 character in it.");
|
||||
problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
|
||||
}
|
||||
'\n' => {
|
||||
// TODO report the problem and then return Err.
|
||||
// We hit a newline before a close quote.
|
||||
// We can't safely assume where the string was supposed
|
||||
// to end, so this is an unrecoverable error.
|
||||
panic!("TODO string missing closing quote.");
|
||||
return Err(unexpected('\n', 0, state, Attempting::StringLiteral));
|
||||
}
|
||||
normal_char => buf.push(normal_char),
|
||||
}
|
||||
|
@ -108,12 +102,24 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
|||
}
|
||||
}
|
||||
|
||||
fn escaped_char_problem<'a, 'p>(
|
||||
problems: &'p mut Problems,
|
||||
problem: Problem,
|
||||
state: &State<'a>,
|
||||
buf_len: usize,
|
||||
) {
|
||||
fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
|
||||
let start_line = state.line;
|
||||
let start_col = state.column + buf_len as u16;
|
||||
let end_line = start_line;
|
||||
// All invalid chars should have a length of 1
|
||||
let end_col = state.column + 1;
|
||||
|
||||
let region = Region {
|
||||
start_line,
|
||||
start_col,
|
||||
end_line,
|
||||
end_col,
|
||||
};
|
||||
|
||||
Loc { region, value }
|
||||
}
|
||||
|
||||
fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
|
||||
let start_line = state.line;
|
||||
let start_col = state.column + buf_len as u16;
|
||||
let end_line = start_line;
|
||||
|
@ -127,19 +133,15 @@ fn escaped_char_problem<'a, 'p>(
|
|||
end_col,
|
||||
};
|
||||
|
||||
problems.push(Loc {
|
||||
region,
|
||||
value: problem,
|
||||
});
|
||||
Loc { region, value }
|
||||
}
|
||||
|
||||
fn escaped_unicode_problem<'a, 'p>(
|
||||
problems: &'p mut Problems,
|
||||
problem: Problem,
|
||||
fn loc_escaped_unicode<'a, V>(
|
||||
value: V,
|
||||
state: &State<'a>,
|
||||
buf_len: usize,
|
||||
hex_str_len: usize,
|
||||
) {
|
||||
) -> Loc<V> {
|
||||
let start_line = state.line;
|
||||
// +1 due to the `"` which precedes buf.
|
||||
let start_col = state.column + buf_len as u16 + 1;
|
||||
|
@ -155,10 +157,7 @@ fn escaped_unicode_problem<'a, 'p>(
|
|||
end_col,
|
||||
};
|
||||
|
||||
problems.push(Loc {
|
||||
region,
|
||||
value: problem,
|
||||
});
|
||||
Loc { region, value }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
@ -179,29 +178,29 @@ where
|
|||
't' => buf.push('\t'),
|
||||
'n' => buf.push('\n'),
|
||||
'r' => buf.push('\r'),
|
||||
'0' => buf.push('\0'), // We explicitly support null characters, as we
|
||||
// can't be sure we won't receive them from Rust.
|
||||
'u' => handle_escaped_unicode(arena, state, chars, buf, problems)?,
|
||||
'(' => panic!("TODO handle string interpolation"),
|
||||
'\t' => {
|
||||
// Report and continue.
|
||||
// Tabs are syntax errors, but maybe the rest of the string is fine!
|
||||
escaped_char_problem(problems, Problem::Tab, &state, buf.len());
|
||||
problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
|
||||
}
|
||||
'\r' => {
|
||||
// Report and continue.
|
||||
// Carriage returns aren't allowed in string literals,
|
||||
// but maybe the rest of the string is fine!
|
||||
escaped_char_problem(problems, Problem::CarriageReturn, &state, buf.len());
|
||||
}
|
||||
'\0' => {
|
||||
// Report and continue.
|
||||
// Null characters aren't allowed in string literals,
|
||||
// but maybe the rest of the string is fine!
|
||||
escaped_char_problem(problems, Problem::NullChar, &state, buf.len());
|
||||
problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
|
||||
}
|
||||
'\n' => {
|
||||
// Report and bail out.
|
||||
// We can't safely assume where the string was supposed to end.
|
||||
escaped_char_problem(problems, Problem::NewlineInLiteral, &state, buf.len());
|
||||
problems.push(loc_escaped_char(
|
||||
Problem::NewlineInLiteral,
|
||||
&state,
|
||||
buf.len(),
|
||||
));
|
||||
|
||||
return Err(unexpected_eof(
|
||||
buf.len(),
|
||||
|
@ -212,7 +211,11 @@ where
|
|||
_ => {
|
||||
// Report and continue.
|
||||
// An unsupported escaped char (e.g. \q) shouldn't halt parsing.
|
||||
escaped_char_problem(problems, Problem::UnsupportedEscapedChar, &state, buf.len());
|
||||
problems.push(loc_escaped_char(
|
||||
Problem::UnsupportedEscapedChar,
|
||||
&state,
|
||||
buf.len(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -302,13 +305,12 @@ where
|
|||
match char::from_u32(code_pt) {
|
||||
Some(ch) => buf.push(ch),
|
||||
None => {
|
||||
escaped_unicode_problem(
|
||||
problems,
|
||||
problems.push(loc_escaped_unicode(
|
||||
Problem::InvalidUnicodeCodePoint,
|
||||
&state,
|
||||
start_of_unicode,
|
||||
hex_str.len(),
|
||||
);
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -320,13 +322,12 @@ where
|
|||
Problem::NonHexCharsInUnicodeCodePoint
|
||||
};
|
||||
|
||||
escaped_unicode_problem(
|
||||
problems,
|
||||
problems.push(loc_escaped_unicode(
|
||||
problem,
|
||||
&state,
|
||||
start_of_unicode,
|
||||
hex_str.len(),
|
||||
);
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -337,36 +338,33 @@ where
|
|||
'\t' => {
|
||||
// Report and continue.
|
||||
// Tabs are syntax errors, but maybe the rest of the string is fine!
|
||||
escaped_unicode_problem(
|
||||
problems,
|
||||
problems.push(loc_escaped_unicode(
|
||||
Problem::Tab,
|
||||
&state,
|
||||
start_of_unicode,
|
||||
hex_str.len(),
|
||||
);
|
||||
));
|
||||
}
|
||||
'\r' => {
|
||||
// Report and continue.
|
||||
// Carriage returns aren't allowed in string literals,
|
||||
// but maybe the rest of the string is fine!
|
||||
escaped_unicode_problem(
|
||||
problems,
|
||||
problems.push(loc_escaped_unicode(
|
||||
Problem::CarriageReturn,
|
||||
&state,
|
||||
start_of_unicode,
|
||||
hex_str.len(),
|
||||
);
|
||||
));
|
||||
}
|
||||
'\n' => {
|
||||
// Report and bail out.
|
||||
// We can't safely assume where the string was supposed to end.
|
||||
escaped_unicode_problem(
|
||||
problems,
|
||||
problems.push(loc_escaped_unicode(
|
||||
Problem::NewlineInLiteral,
|
||||
&state,
|
||||
start_of_unicode,
|
||||
hex_str.len(),
|
||||
);
|
||||
));
|
||||
|
||||
return Err(unexpected_eof(
|
||||
buf.len(),
|
||||
|
|
|
@ -11,6 +11,17 @@ pub struct Region {
|
|||
pub end_col: u16,
|
||||
}
|
||||
|
||||
impl Region {
|
||||
pub fn zero() -> Self {
|
||||
Region {
|
||||
start_line: 0,
|
||||
end_line: 0,
|
||||
start_col: 0,
|
||||
end_col: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn region_size() {
|
||||
// Region is used all over the place. Avoid increasing its size!
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use std::mem::{self, MaybeUninit};
|
||||
use std::slice;
|
||||
use std::ptr;
|
||||
use std::fmt;
|
||||
use std::str;
|
||||
use std::alloc::{self, Layout};
|
||||
use std::fmt;
|
||||
use std::mem::{self, MaybeUninit};
|
||||
use std::ptr;
|
||||
use std::slice;
|
||||
use std::str;
|
||||
|
||||
/// An immutable string whose maximum length is `isize::MAX`. (For convenience,
|
||||
/// it still returns its length as `usize` since it can't be negative.)
|
||||
|
@ -73,7 +73,8 @@ struct LongStr {
|
|||
// The bit pattern for an empty string. (1 and then all 0s.)
|
||||
// Any other bit pattern means this is not an empty string!
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
const EMPTY_STRING: usize = 0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000;
|
||||
const EMPTY_STRING: usize =
|
||||
0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000;
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
const EMPTY_STRING: usize = 0b1000_0000_0000_0000;
|
||||
|
@ -91,7 +92,7 @@ impl RocStr {
|
|||
length: EMPTY_STRING,
|
||||
// empty strings only ever have length set.
|
||||
bytes: MaybeUninit::uninit(),
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -176,16 +177,12 @@ impl Clone for LongStr {
|
|||
let new_bytes_ptr = unsafe { alloc::alloc(layout) };
|
||||
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(
|
||||
old_bytes_ptr,
|
||||
new_bytes_ptr,
|
||||
length
|
||||
);
|
||||
ptr::copy_nonoverlapping(old_bytes_ptr, new_bytes_ptr, length);
|
||||
}
|
||||
|
||||
LongStr {
|
||||
bytes: MaybeUninit::new(new_bytes_ptr),
|
||||
length
|
||||
length,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -203,9 +200,7 @@ impl Into<String> for RocStr {
|
|||
let bytes_ptr = unsafe { &self.0.raw } as *const u8;
|
||||
|
||||
// These bytes are already aligned, so we can use them directly.
|
||||
let bytes_slice: &[u8] = unsafe {
|
||||
slice::from_raw_parts(bytes_ptr, length as usize)
|
||||
};
|
||||
let bytes_slice: &[u8] = unsafe { slice::from_raw_parts(bytes_ptr, length as usize) };
|
||||
|
||||
(unsafe { str::from_utf8_unchecked(bytes_slice) }).to_string()
|
||||
} else {
|
||||
|
@ -238,11 +233,7 @@ impl From<String> for RocStr {
|
|||
// Copy the raw bytes from the string into the buffer.
|
||||
unsafe {
|
||||
// Write into the buffer's bytes
|
||||
ptr::copy_nonoverlapping(
|
||||
string.as_ptr(),
|
||||
buffer.as_ptr() as *mut u8,
|
||||
str_len
|
||||
);
|
||||
ptr::copy_nonoverlapping(string.as_ptr(), buffer.as_ptr() as *mut u8, str_len);
|
||||
}
|
||||
|
||||
// Set the last byte in the buffer to be the length (with flag).
|
||||
|
@ -263,13 +254,16 @@ impl From<String> for RocStr {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
impl Clone for RocStr {
|
||||
fn clone(&self) -> Self {
|
||||
let inner = if flagged_as_short_string(self.len_msbyte()) {
|
||||
InnerStr { raw: (unsafe { self.0.raw }).clone() }
|
||||
InnerStr {
|
||||
raw: (unsafe { self.0.raw }).clone(),
|
||||
}
|
||||
} else {
|
||||
InnerStr { long: (unsafe { self.0.long }).clone() }
|
||||
InnerStr {
|
||||
long: (unsafe { self.0.long }).clone(),
|
||||
}
|
||||
};
|
||||
|
||||
RocStr(inner)
|
||||
|
@ -291,7 +285,9 @@ impl Drop for RocStr {
|
|||
|
||||
// We don't need to call drop_in_place. We know bytes_ptr points to
|
||||
// a plain u8 array, so there will for sure be no destructor to run.
|
||||
unsafe { alloc::dealloc(bytes_ptr as *mut u8, layout); }
|
||||
unsafe {
|
||||
alloc::dealloc(bytes_ptr as *mut u8, layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ pub fn loc_box<T>(val: T) -> Box<Located<T>> {
|
|||
}
|
||||
|
||||
pub fn loc<T>(val: T) -> Located<T> {
|
||||
Located::new(val, empty_region())
|
||||
Located::new(val, Region::zero())
|
||||
}
|
||||
|
||||
pub fn located<T>(
|
||||
|
@ -34,16 +34,6 @@ pub fn located<T>(
|
|||
)
|
||||
}
|
||||
|
||||
pub fn empty_region() -> Region {
|
||||
Region {
|
||||
start_line: 0,
|
||||
start_col: 0,
|
||||
|
||||
end_line: 0,
|
||||
end_col: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn zero_loc<T>(located_val: Located<T>) -> Located<T> {
|
||||
loc(located_val.value)
|
||||
}
|
||||
|
|
|
@ -10,9 +10,7 @@ mod helpers;
|
|||
|
||||
#[cfg(test)]
|
||||
mod test_canonicalize {
|
||||
use helpers::{
|
||||
empty_region, loc, loc_box, mut_map_from_pairs, parse_without_loc, zero_loc_expr,
|
||||
};
|
||||
use helpers::{loc, loc_box, mut_map_from_pairs, parse_without_loc, zero_loc_expr};
|
||||
use roc::canonicalize;
|
||||
use roc::canonicalize::Expr::*;
|
||||
use roc::canonicalize::Pattern::*;
|
||||
|
@ -142,7 +140,7 @@ mod test_canonicalize {
|
|||
Procedure {
|
||||
name: Some("func".to_string()),
|
||||
is_self_tail_recursive: false,
|
||||
definition: empty_region(),
|
||||
definition: Region::zero(),
|
||||
args: vec![loc(Pattern::Identifier(sym("arg")))],
|
||||
body: loc(Expr::Operator(
|
||||
loc_box(Expr::Var(sym("arg"))),
|
||||
|
|
|
@ -15,12 +15,12 @@ mod test_parser {
|
|||
use roc::parse;
|
||||
use roc::parse::ast::Attempting;
|
||||
use roc::parse::ast::Expr::{self, *};
|
||||
use roc::parse::parser::{Parser, State};
|
||||
use roc::parse::parser::{Fail, FailReason, Parser, State};
|
||||
use roc::parse::problems::Problem;
|
||||
use roc::region::Located;
|
||||
use roc::region::{Located, Region};
|
||||
|
||||
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
|
||||
let state = State::new(&input, Attempting::Expression);
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let arena = Bump::new();
|
||||
let parser = parse::expr();
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
@ -29,6 +29,17 @@ mod test_parser {
|
|||
assert_eq!(Ok(expected_expr), actual);
|
||||
}
|
||||
|
||||
fn assert_parsing_fails<'a>(input: &'a str, reason: FailReason, attempting: Attempting) {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let arena = Bump::new();
|
||||
let parser = parse::expr();
|
||||
let answer = parser.parse(&arena, state);
|
||||
let actual = answer.map_err(|(fail, _)| fail);
|
||||
let expected_fail = Fail { reason, attempting };
|
||||
|
||||
assert_eq!(Err(expected_fail), actual);
|
||||
}
|
||||
|
||||
fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec<Located<Problem>>) {
|
||||
let state = State::new(&input, Attempting::Expression);
|
||||
let arena = Bump::new();
|
||||
|
@ -244,6 +255,32 @@ mod test_parser {
|
|||
// TODO verify that exceeding maximum line length does NOT panic
|
||||
// TODO verify that exceeding maximum line count does NOT panic
|
||||
|
||||
#[test]
|
||||
fn empty_source_file() {
|
||||
assert_parsing_fails("", FailReason::Eof(Region::zero()), Attempting::Expression);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_line_too_long() {
|
||||
let max_line_length = std::u16::MAX as usize;
|
||||
|
||||
// the string literal "ZZZZZZZZZ" but with way more Zs
|
||||
let too_long_str_body: String = (1..max_line_length)
|
||||
.into_iter()
|
||||
.map(|_| "Z".to_string())
|
||||
.collect();
|
||||
let too_long_str = format!("\"{}\"", too_long_str_body);
|
||||
|
||||
// Make sure it's longer than our maximum line length
|
||||
assert_eq!(too_long_str.len(), max_line_length + 1);
|
||||
|
||||
assert_parsing_fails(
|
||||
&too_long_str,
|
||||
FailReason::LineTooLong(0),
|
||||
Attempting::Expression,
|
||||
);
|
||||
}
|
||||
|
||||
// NUMBER LITERALS
|
||||
|
||||
#[test]
|
||||
|
@ -259,4 +296,55 @@ mod test_parser {
|
|||
assert_parses_to("-42", Int(-42));
|
||||
assert_parses_to(&std::i64::MIN.to_string(), Int(std::i64::MIN));
|
||||
}
|
||||
|
||||
// fn expect_parsed_float<'a>(expected: f64, actual: &str) {
|
||||
// assert_eq!(
|
||||
// Ok((Float(expected), "".to_string())),
|
||||
// parse_without_loc(actual)
|
||||
// );
|
||||
// }
|
||||
|
||||
// fn expect_parsed_int<'a>(expected: i64, actual: &str) {
|
||||
// assert_eq!(
|
||||
// Ok((Int(expected), "".to_string())),
|
||||
// parse_without_loc(actual)
|
||||
// );
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn positive_int() {
|
||||
// expect_parsed_int(1234, "1234");
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn negative_int() {
|
||||
// expect_parsed_int(-1234, "-1234");
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn positive_float() {
|
||||
// expect_parsed_float(123.45, "123.45");
|
||||
// expect_parsed_float(42.00, "42.00");
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn negative_float() {
|
||||
// expect_parsed_float(-1234.567, "-1234.567");
|
||||
// expect_parsed_float(-192.0, "-192.0");
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn ints_with_underscores() {
|
||||
// expect_parsed_int(987654321, "987_6_5_432_1");
|
||||
// expect_parsed_int(-1234567890, "-1_234_567_890");
|
||||
// }
|
||||
|
||||
// #[test]
|
||||
// fn fracs_with_spaces() {
|
||||
// expect_parsed_float(-1234.567, "-1_23_4.567");
|
||||
// expect_parsed_float(-192.0, "-19_2.0");
|
||||
// expect_parsed_float(123.45, "1_2_3.45");
|
||||
// expect_parsed_float(42.00, "4_2.00");
|
||||
// }
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue