Fix some line length and attempting issues.

This commit is contained in:
Richard Feldman 2019-09-07 09:43:19 -04:00
parent 2ad510ba77
commit dfd1d4bbb4
10 changed files with 430 additions and 271 deletions

View file

@ -114,4 +114,5 @@ pub enum Attempting {
NumberLiteral, NumberLiteral,
UnicodeEscape, UnicodeEscape,
Expression, Expression,
Module,
} }

View file

@ -4,13 +4,14 @@ pub mod parser;
pub mod problems; pub mod problems;
pub mod string_literal; pub mod string_literal;
use parse::ast::Expr; use parse::ast::{Attempting, Expr};
use parse::number_literal::number_literal; use parse::number_literal::number_literal;
use parse::parser::Parser; use parse::parser::{attempt, one_of2, Parser};
use parse::string_literal::string_literal; use parse::string_literal::string_literal;
pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> { pub fn expr<'a>() -> impl Parser<'a, Expr<'a>> {
parser::one_of2(string_literal(), number_literal()) attempt(
Attempting::Expression,
one_of2(number_literal(), string_literal()),
)
} }
const KW_IF: &'static str = "if";

View file

@ -121,7 +121,7 @@ where
}; };
let total_chars_parsed = before_decimal.len() + chars_skipped; let total_chars_parsed = before_decimal.len() + chars_skipped;
let state = state.advance_without_indenting(total_chars_parsed); let state = state.advance_without_indenting(total_chars_parsed)?;
Ok((expr, state)) Ok((expr, state))
} }

View file

@ -49,19 +49,23 @@ impl<'a> State<'a> {
/// Increments the line, then resets column, indent_col, and is_indenting. /// Increments the line, then resets column, indent_col, and is_indenting.
/// This does *not* advance the input. /// This does *not* advance the input.
pub fn newline(&self) -> Self { pub fn newline(&self) -> Result<Self, (Fail, Self)> {
let line = self match self.line.checked_add(1) {
.line Some(line) => Ok(State {
.checked_add(1)
.unwrap_or_else(panic_max_line_count_exceeded);
State {
input: self.input, input: self.input,
line, line,
column: 0, column: 0,
indent_col: 1, indent_col: 1,
is_indenting: true, is_indenting: true,
attempting: self.attempting, attempting: self.attempting,
}),
None => Err((
Fail {
reason: FailReason::TooManyLines,
attempting: self.attempting,
},
self.clone(),
)),
} }
} }
@ -69,16 +73,10 @@ impl<'a> State<'a> {
/// This assumes we are *not* advancing with spaces, or at least that /// This assumes we are *not* advancing with spaces, or at least that
/// any spaces on the line were preceded by non-spaces - which would mean /// any spaces on the line were preceded by non-spaces - which would mean
/// they weren't eligible to indent anyway. /// they weren't eligible to indent anyway.
pub fn advance_without_indenting(&self, quantity: usize) -> Self { pub fn advance_without_indenting(&self, quantity: usize) -> Result<Self, (Fail, Self)> {
let column_usize = (self.column as usize) match (self.column as usize).checked_add(quantity) {
.checked_add(quantity) Some(column_usize) if column_usize <= std::u16::MAX as usize => {
.unwrap_or_else(panic_max_line_length_exceeded); Ok(State {
if column_usize > std::u16::MAX as usize {
panic_max_line_length_exceeded();
}
State {
input: &self.input[quantity..], input: &self.input[quantity..],
line: self.line, line: self.line,
column: column_usize as u16, column: column_usize as u16,
@ -86,22 +84,22 @@ impl<'a> State<'a> {
// Once we hit a nonspace character, we are no longer indenting. // Once we hit a nonspace character, we are no longer indenting.
is_indenting: false, is_indenting: false,
attempting: self.attempting, attempting: self.attempting,
})
}
_ => Err((
Fail {
reason: FailReason::LineTooLong(self.line),
attempting: self.attempting,
},
self.clone(),
)),
} }
} }
/// Advance the parser while also indenting as appropriate. /// Advance the parser while also indenting as appropriate.
/// This assumes we are only advancing with spaces, since they can indent. /// This assumes we are only advancing with spaces, since they can indent.
pub fn advance_spaces(&self, spaces: usize) -> Self { pub fn advance_spaces(&self, spaces: usize) -> Result<Self, (Fail, Self)> {
// We'll cast this to u16 later. match (self.column as usize).checked_add(spaces) {
debug_assert!(spaces <= std::u16::MAX as usize); Some(column_usize) if column_usize <= std::u16::MAX as usize => {
let column_usize = (self.column as usize)
.checked_add(spaces)
.unwrap_or_else(panic_max_line_length_exceeded);
if column_usize > std::u16::MAX as usize {
panic_max_line_length_exceeded();
}
// Spaces don't affect is_indenting; if we were previously indneting, // Spaces don't affect is_indenting; if we were previously indneting,
// we still are, and if we already finished indenting, we're still done. // we still are, and if we already finished indenting, we're still done.
let is_indenting = self.is_indenting; let is_indenting = self.is_indenting;
@ -110,43 +108,37 @@ impl<'a> State<'a> {
let indent_col = if is_indenting { let indent_col = if is_indenting {
// This doesn't need to be checked_add because it's always true that // This doesn't need to be checked_add because it's always true that
// indent_col <= col, so if this could possibly overflow, we would // indent_col <= col, so if this could possibly overflow, we would
// already have panicked from the column calculation. // already have errored out from the column calculation.
// //
// Leaving a debug_assert! in case this invariant someday disappers. // Leaving debug assertions in case this invariant someday disappers.
debug_assert!(std::u16::MAX - self.indent_col >= spaces as u16); debug_assert!(std::u16::MAX - self.indent_col >= spaces as u16);
debug_assert!(spaces <= std::u16::MAX as usize);
self.indent_col + spaces as u16 self.indent_col + spaces as u16
} else { } else {
self.indent_col self.indent_col
}; };
State { Ok(State {
input: &self.input[spaces..], input: &self.input[spaces..],
line: self.line, line: self.line,
column: column_usize as u16, column: column_usize as u16,
indent_col, indent_col,
is_indenting, is_indenting,
attempting: self.attempting, attempting: self.attempting,
})
}
_ => Err((
Fail {
reason: FailReason::LineTooLong(self.line),
attempting: self.attempting,
},
self.clone(),
)),
} }
} }
} }
#[inline(never)]
fn panic_max_line_count_exceeded() -> u32 {
panic!(
"Maximum line count exceeded. Roc only supports compiling files with at most {} lines.",
std::u32::MAX
)
}
#[inline(never)]
fn panic_max_line_length_exceeded() -> usize {
panic!(
"Maximum line length exceeded. Roc only supports compiling files whose lines each contain no more than {} characters.",
std::u16::MAX
)
}
#[test] #[test]
fn state_size() { fn state_size() {
// State should always be under 8 machine words, so it fits in a typical // State should always be under 8 machine words, so it fits in a typical
@ -157,12 +149,18 @@ fn state_size() {
pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)>; pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)>;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Fail { pub enum FailReason {
Unexpected(char, Region, Attempting), Unexpected(char, Region),
ConditionFailed(Attempting), ConditionFailed,
LineTooLong(u32 /* which line was too long */), LineTooLong(u32 /* which line was too long */),
TooManyLines, TooManyLines,
Eof(Region, Attempting), Eof(Region),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Fail {
pub attempting: Attempting,
pub reason: FailReason,
} }
pub trait Parser<'a, Output> { pub trait Parser<'a, Output> {
@ -235,8 +233,9 @@ pub fn unexpected_eof<'a>(
attempting: Attempting, attempting: Attempting,
state: State<'a>, state: State<'a>,
) -> (Fail, State<'a>) { ) -> (Fail, State<'a>) {
checked_unexpected(chars_consumed, state, |region| { checked_unexpected(chars_consumed, state, |region| Fail {
Fail::Eof(region, attempting) reason: FailReason::Eof(region),
attempting,
}) })
} }
@ -246,8 +245,9 @@ pub fn unexpected<'a>(
state: State<'a>, state: State<'a>,
attempting: Attempting, attempting: Attempting,
) -> (Fail, State<'a>) { ) -> (Fail, State<'a>) {
checked_unexpected(chars_consumed, state, |region| { checked_unexpected(chars_consumed, state, |region| Fail {
Fail::Unexpected(ch, region, attempting) reason: FailReason::Unexpected(ch, region),
attempting,
}) })
} }
@ -274,7 +274,13 @@ where
(problem_from_region(region), state) (problem_from_region(region), state)
} }
_ => (Fail::LineTooLong(state.line), state), _ => {
let reason = FailReason::LineTooLong(state.line);
let attempting = state.attempting;
let fail = Fail { reason, attempting };
(fail, state)
}
} }
} }
@ -289,7 +295,7 @@ pub fn string<'a>(string: &'static str) -> impl Parser<'a, ()> {
let len = string.len(); let len = string.len();
match input.get(0..len) { match input.get(0..len) {
Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len))), Some(next_str) if next_str == string => Ok(((), state.advance_without_indenting(len)?)),
_ => Err(unexpected_eof(len, Attempting::Keyword, state)), _ => Err(unexpected_eof(len, Attempting::Keyword, state)),
} }
} }
@ -307,7 +313,13 @@ where
} }
} }
Err((Fail::ConditionFailed(state.attempting), state)) Err((
Fail {
reason: FailReason::ConditionFailed,
attempting: state.attempting,
},
state,
))
} }
} }
@ -348,12 +360,22 @@ where
P1: Parser<'a, A>, P1: Parser<'a, A>,
P2: Parser<'a, A>, P2: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
}, },
state,
)),
},
}
} }
} }
@ -363,17 +385,27 @@ where
P2: Parser<'a, A>, P2: Parser<'a, A>,
P3: Parser<'a, A>, P3: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p3.parse(arena, state) { Err((_, state)) => match p3.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
}, },
}, },
} }
} }
}
pub fn one_of4<'a, P1, P2, P3, P4, A>(p1: P1, p2: P2, p3: P3, p4: P4) -> impl Parser<'a, A> pub fn one_of4<'a, P1, P2, P3, P4, A>(p1: P1, p2: P2, p3: P3, p4: P4) -> impl Parser<'a, A>
where where
@ -382,7 +414,10 @@ where
P3: Parser<'a, A>, P3: Parser<'a, A>,
P4: Parser<'a, A>, P4: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
@ -390,12 +425,19 @@ where
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p4.parse(arena, state) { Err((_, state)) => match p4.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
}, },
}, },
}, },
} }
} }
}
pub fn one_of5<'a, P1, P2, P3, P4, P5, A>( pub fn one_of5<'a, P1, P2, P3, P4, P5, A>(
p1: P1, p1: P1,
@ -411,7 +453,10 @@ where
P4: Parser<'a, A>, P4: Parser<'a, A>,
P5: Parser<'a, A>, P5: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
@ -421,13 +466,20 @@ where
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p5.parse(arena, state) { Err((_, state)) => match p5.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => Err((Fail::ConditionFailed(state.attempting), state)), Err((fail, state)) => Err((
Fail {
attempting: original_attempting,
..fail
},
state,
)),
}, },
}, },
}, },
}, },
} }
} }
}
pub fn one_of6<'a, P1, P2, P3, P4, P5, P6, A>( pub fn one_of6<'a, P1, P2, P3, P4, P5, P6, A>(
p1: P1, p1: P1,
@ -445,7 +497,10 @@ where
P5: Parser<'a, A>, P5: Parser<'a, A>,
P6: Parser<'a, A>, P6: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
@ -457,14 +512,19 @@ where
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p6.parse(arena, state) { Err((_, state)) => match p6.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => { Err((fail, state)) => Err((
Err((Fail::ConditionFailed(state.attempting), state)) Fail {
attempting: original_attempting,
..fail
},
state,
)),
},
},
},
},
},
} }
},
},
},
},
},
} }
} }
@ -486,7 +546,10 @@ where
P6: Parser<'a, A>, P6: Parser<'a, A>,
P7: Parser<'a, A>, P7: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
@ -500,15 +563,20 @@ where
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p7.parse(arena, state) { Err((_, state)) => match p7.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => { Err((fail, state)) => Err((
Err((Fail::ConditionFailed(state.attempting), state)) Fail {
attempting: original_attempting,
..fail
},
state,
)),
},
},
},
},
},
},
} }
},
},
},
},
},
},
} }
} }
@ -532,7 +600,10 @@ where
P7: Parser<'a, A>, P7: Parser<'a, A>,
P8: Parser<'a, A>, P8: Parser<'a, A>,
{ {
move |arena: &'a Bump, state: State<'a>| match p1.parse(arena, state) { move |arena: &'a Bump, state: State<'a>| {
let original_attempting = state.attempting;
match p1.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p2.parse(arena, state) { Err((_, state)) => match p2.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
@ -548,9 +619,13 @@ where
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => match p8.parse(arena, state) { Err((_, state)) => match p8.parse(arena, state) {
valid @ Ok(_) => valid, valid @ Ok(_) => valid,
Err((_, state)) => { Err((fail, state)) => Err((
Err((Fail::ConditionFailed(state.attempting), state)) Fail {
} attempting: original_attempting,
..fail
},
state,
)),
}, },
}, },
}, },
@ -560,3 +635,4 @@ where
}, },
} }
} }
}

View file

@ -30,7 +30,7 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
return Ok(( return Ok((
Expr::EmptyStr, Expr::EmptyStr,
// 2 because `""` has length 2 // 2 because `""` has length 2
state.advance_without_indenting(2), state.advance_without_indenting(2)?,
)); ));
} }
@ -69,31 +69,25 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
Expr::MalformedStr(problems.into_boxed_slice()) Expr::MalformedStr(problems.into_boxed_slice())
}; };
return Ok((expr, state.advance_without_indenting(len_with_quotes))); let next_state = state.advance_without_indenting(len_with_quotes)?;
return Ok((expr, next_state));
} }
'\t' => { '\t' => {
// TODO report the problem and continue. // Report the problem and continue. Tabs are syntax errors,
// Tabs are syntax errors, but maybe the rest of the // but maybe the rest of the string is fine!
// string is fine! problems.push(loc_char(Problem::Tab, &state, buf.len()));
panic!("TODO string had a tab character in it.");
} }
'\r' => { '\r' => {
// TODO report the problem and continue.
// Carriage returns aren't allowed in string literals, // Carriage returns aren't allowed in string literals,
// but maybe the rest of the string is fine! // but maybe the rest of the string is fine!
panic!("TODO string had a tab character in it."); problems.push(loc_char(Problem::CarriageReturn, &state, buf.len()));
}
'\0' => {
// TODO report the problem and continue.
// Null characters aren't allowed in string literals,
// but maybe the rest of the string is fine!
panic!("TODO string had a \\0 character in it.");
} }
'\n' => { '\n' => {
// TODO report the problem and then return Err. // We hit a newline before a close quote.
// We can't safely assume where the string was supposed // We can't safely assume where the string was supposed
// to end, so this is an unrecoverable error. // to end, so this is an unrecoverable error.
panic!("TODO string missing closing quote."); return Err(unexpected('\n', 0, state, Attempting::StringLiteral));
} }
normal_char => buf.push(normal_char), normal_char => buf.push(normal_char),
} }
@ -108,12 +102,24 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
} }
} }
fn escaped_char_problem<'a, 'p>( fn loc_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
problems: &'p mut Problems, let start_line = state.line;
problem: Problem, let start_col = state.column + buf_len as u16;
state: &State<'a>, let end_line = start_line;
buf_len: usize, // All invalid chars should have a length of 1
) { let end_col = state.column + 1;
let region = Region {
start_line,
start_col,
end_line,
end_col,
};
Loc { region, value }
}
fn loc_escaped_char<'a, V>(value: V, state: &State<'a>, buf_len: usize) -> Loc<V> {
let start_line = state.line; let start_line = state.line;
let start_col = state.column + buf_len as u16; let start_col = state.column + buf_len as u16;
let end_line = start_line; let end_line = start_line;
@ -127,19 +133,15 @@ fn escaped_char_problem<'a, 'p>(
end_col, end_col,
}; };
problems.push(Loc { Loc { region, value }
region,
value: problem,
});
} }
fn escaped_unicode_problem<'a, 'p>( fn loc_escaped_unicode<'a, V>(
problems: &'p mut Problems, value: V,
problem: Problem,
state: &State<'a>, state: &State<'a>,
buf_len: usize, buf_len: usize,
hex_str_len: usize, hex_str_len: usize,
) { ) -> Loc<V> {
let start_line = state.line; let start_line = state.line;
// +1 due to the `"` which precedes buf. // +1 due to the `"` which precedes buf.
let start_col = state.column + buf_len as u16 + 1; let start_col = state.column + buf_len as u16 + 1;
@ -155,10 +157,7 @@ fn escaped_unicode_problem<'a, 'p>(
end_col, end_col,
}; };
problems.push(Loc { Loc { region, value }
region,
value: problem,
});
} }
#[inline(always)] #[inline(always)]
@ -179,29 +178,29 @@ where
't' => buf.push('\t'), 't' => buf.push('\t'),
'n' => buf.push('\n'), 'n' => buf.push('\n'),
'r' => buf.push('\r'), 'r' => buf.push('\r'),
'0' => buf.push('\0'), // We explicitly support null characters, as we
// can't be sure we won't receive them from Rust.
'u' => handle_escaped_unicode(arena, state, chars, buf, problems)?, 'u' => handle_escaped_unicode(arena, state, chars, buf, problems)?,
'(' => panic!("TODO handle string interpolation"), '(' => panic!("TODO handle string interpolation"),
'\t' => { '\t' => {
// Report and continue. // Report and continue.
// Tabs are syntax errors, but maybe the rest of the string is fine! // Tabs are syntax errors, but maybe the rest of the string is fine!
escaped_char_problem(problems, Problem::Tab, &state, buf.len()); problems.push(loc_escaped_char(Problem::Tab, &state, buf.len()));
} }
'\r' => { '\r' => {
// Report and continue. // Report and continue.
// Carriage returns aren't allowed in string literals, // Carriage returns aren't allowed in string literals,
// but maybe the rest of the string is fine! // but maybe the rest of the string is fine!
escaped_char_problem(problems, Problem::CarriageReturn, &state, buf.len()); problems.push(loc_escaped_char(Problem::CarriageReturn, &state, buf.len()));
}
'\0' => {
// Report and continue.
// Null characters aren't allowed in string literals,
// but maybe the rest of the string is fine!
escaped_char_problem(problems, Problem::NullChar, &state, buf.len());
} }
'\n' => { '\n' => {
// Report and bail out. // Report and bail out.
// We can't safely assume where the string was supposed to end. // We can't safely assume where the string was supposed to end.
escaped_char_problem(problems, Problem::NewlineInLiteral, &state, buf.len()); problems.push(loc_escaped_char(
Problem::NewlineInLiteral,
&state,
buf.len(),
));
return Err(unexpected_eof( return Err(unexpected_eof(
buf.len(), buf.len(),
@ -212,7 +211,11 @@ where
_ => { _ => {
// Report and continue. // Report and continue.
// An unsupported escaped char (e.g. \q) shouldn't halt parsing. // An unsupported escaped char (e.g. \q) shouldn't halt parsing.
escaped_char_problem(problems, Problem::UnsupportedEscapedChar, &state, buf.len()); problems.push(loc_escaped_char(
Problem::UnsupportedEscapedChar,
&state,
buf.len(),
));
} }
} }
@ -302,13 +305,12 @@ where
match char::from_u32(code_pt) { match char::from_u32(code_pt) {
Some(ch) => buf.push(ch), Some(ch) => buf.push(ch),
None => { None => {
escaped_unicode_problem( problems.push(loc_escaped_unicode(
problems,
Problem::InvalidUnicodeCodePoint, Problem::InvalidUnicodeCodePoint,
&state, &state,
start_of_unicode, start_of_unicode,
hex_str.len(), hex_str.len(),
); ));
} }
} }
} }
@ -320,13 +322,12 @@ where
Problem::NonHexCharsInUnicodeCodePoint Problem::NonHexCharsInUnicodeCodePoint
}; };
escaped_unicode_problem( problems.push(loc_escaped_unicode(
problems,
problem, problem,
&state, &state,
start_of_unicode, start_of_unicode,
hex_str.len(), hex_str.len(),
); ));
} }
} }
@ -337,36 +338,33 @@ where
'\t' => { '\t' => {
// Report and continue. // Report and continue.
// Tabs are syntax errors, but maybe the rest of the string is fine! // Tabs are syntax errors, but maybe the rest of the string is fine!
escaped_unicode_problem( problems.push(loc_escaped_unicode(
problems,
Problem::Tab, Problem::Tab,
&state, &state,
start_of_unicode, start_of_unicode,
hex_str.len(), hex_str.len(),
); ));
} }
'\r' => { '\r' => {
// Report and continue. // Report and continue.
// Carriage returns aren't allowed in string literals, // Carriage returns aren't allowed in string literals,
// but maybe the rest of the string is fine! // but maybe the rest of the string is fine!
escaped_unicode_problem( problems.push(loc_escaped_unicode(
problems,
Problem::CarriageReturn, Problem::CarriageReturn,
&state, &state,
start_of_unicode, start_of_unicode,
hex_str.len(), hex_str.len(),
); ));
} }
'\n' => { '\n' => {
// Report and bail out. // Report and bail out.
// We can't safely assume where the string was supposed to end. // We can't safely assume where the string was supposed to end.
escaped_unicode_problem( problems.push(loc_escaped_unicode(
problems,
Problem::NewlineInLiteral, Problem::NewlineInLiteral,
&state, &state,
start_of_unicode, start_of_unicode,
hex_str.len(), hex_str.len(),
); ));
return Err(unexpected_eof( return Err(unexpected_eof(
buf.len(), buf.len(),

View file

@ -11,6 +11,17 @@ pub struct Region {
pub end_col: u16, pub end_col: u16,
} }
impl Region {
pub fn zero() -> Self {
Region {
start_line: 0,
end_line: 0,
start_col: 0,
end_col: 0,
}
}
}
#[test] #[test]
fn region_size() { fn region_size() {
// Region is used all over the place. Avoid increasing its size! // Region is used all over the place. Avoid increasing its size!

View file

@ -1,9 +1,9 @@
use std::mem::{self, MaybeUninit};
use std::slice;
use std::ptr;
use std::fmt;
use std::str;
use std::alloc::{self, Layout}; use std::alloc::{self, Layout};
use std::fmt;
use std::mem::{self, MaybeUninit};
use std::ptr;
use std::slice;
use std::str;
/// An immutable string whose maximum length is `isize::MAX`. (For convenience, /// An immutable string whose maximum length is `isize::MAX`. (For convenience,
/// it still returns its length as `usize` since it can't be negative.) /// it still returns its length as `usize` since it can't be negative.)
@ -73,7 +73,8 @@ struct LongStr {
// The bit pattern for an empty string. (1 and then all 0s.) // The bit pattern for an empty string. (1 and then all 0s.)
// Any other bit pattern means this is not an empty string! // Any other bit pattern means this is not an empty string!
#[cfg(target_pointer_width = "64")] #[cfg(target_pointer_width = "64")]
const EMPTY_STRING: usize = 0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000; const EMPTY_STRING: usize =
0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000;
#[cfg(target_pointer_width = "32")] #[cfg(target_pointer_width = "32")]
const EMPTY_STRING: usize = 0b1000_0000_0000_0000; const EMPTY_STRING: usize = 0b1000_0000_0000_0000;
@ -91,7 +92,7 @@ impl RocStr {
length: EMPTY_STRING, length: EMPTY_STRING,
// empty strings only ever have length set. // empty strings only ever have length set.
bytes: MaybeUninit::uninit(), bytes: MaybeUninit::uninit(),
} },
}) })
} }
@ -176,16 +177,12 @@ impl Clone for LongStr {
let new_bytes_ptr = unsafe { alloc::alloc(layout) }; let new_bytes_ptr = unsafe { alloc::alloc(layout) };
unsafe { unsafe {
ptr::copy_nonoverlapping( ptr::copy_nonoverlapping(old_bytes_ptr, new_bytes_ptr, length);
old_bytes_ptr,
new_bytes_ptr,
length
);
} }
LongStr { LongStr {
bytes: MaybeUninit::new(new_bytes_ptr), bytes: MaybeUninit::new(new_bytes_ptr),
length length,
} }
} }
} }
@ -203,9 +200,7 @@ impl Into<String> for RocStr {
let bytes_ptr = unsafe { &self.0.raw } as *const u8; let bytes_ptr = unsafe { &self.0.raw } as *const u8;
// These bytes are already aligned, so we can use them directly. // These bytes are already aligned, so we can use them directly.
let bytes_slice: &[u8] = unsafe { let bytes_slice: &[u8] = unsafe { slice::from_raw_parts(bytes_ptr, length as usize) };
slice::from_raw_parts(bytes_ptr, length as usize)
};
(unsafe { str::from_utf8_unchecked(bytes_slice) }).to_string() (unsafe { str::from_utf8_unchecked(bytes_slice) }).to_string()
} else { } else {
@ -238,11 +233,7 @@ impl From<String> for RocStr {
// Copy the raw bytes from the string into the buffer. // Copy the raw bytes from the string into the buffer.
unsafe { unsafe {
// Write into the buffer's bytes // Write into the buffer's bytes
ptr::copy_nonoverlapping( ptr::copy_nonoverlapping(string.as_ptr(), buffer.as_ptr() as *mut u8, str_len);
string.as_ptr(),
buffer.as_ptr() as *mut u8,
str_len
);
} }
// Set the last byte in the buffer to be the length (with flag). // Set the last byte in the buffer to be the length (with flag).
@ -263,13 +254,16 @@ impl From<String> for RocStr {
} }
} }
impl Clone for RocStr { impl Clone for RocStr {
fn clone(&self) -> Self { fn clone(&self) -> Self {
let inner = if flagged_as_short_string(self.len_msbyte()) { let inner = if flagged_as_short_string(self.len_msbyte()) {
InnerStr { raw: (unsafe { self.0.raw }).clone() } InnerStr {
raw: (unsafe { self.0.raw }).clone(),
}
} else { } else {
InnerStr { long: (unsafe { self.0.long }).clone() } InnerStr {
long: (unsafe { self.0.long }).clone(),
}
}; };
RocStr(inner) RocStr(inner)
@ -291,7 +285,9 @@ impl Drop for RocStr {
// We don't need to call drop_in_place. We know bytes_ptr points to // We don't need to call drop_in_place. We know bytes_ptr points to
// a plain u8 array, so there will for sure be no destructor to run. // a plain u8 array, so there will for sure be no destructor to run.
unsafe { alloc::dealloc(bytes_ptr as *mut u8, layout); } unsafe {
alloc::dealloc(bytes_ptr as *mut u8, layout);
}
} }
} }
} }

View file

@ -13,7 +13,7 @@ pub fn loc_box<T>(val: T) -> Box<Located<T>> {
} }
pub fn loc<T>(val: T) -> Located<T> { pub fn loc<T>(val: T) -> Located<T> {
Located::new(val, empty_region()) Located::new(val, Region::zero())
} }
pub fn located<T>( pub fn located<T>(
@ -34,16 +34,6 @@ pub fn located<T>(
) )
} }
pub fn empty_region() -> Region {
Region {
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
}
}
pub fn zero_loc<T>(located_val: Located<T>) -> Located<T> { pub fn zero_loc<T>(located_val: Located<T>) -> Located<T> {
loc(located_val.value) loc(located_val.value)
} }

View file

@ -10,9 +10,7 @@ mod helpers;
#[cfg(test)] #[cfg(test)]
mod test_canonicalize { mod test_canonicalize {
use helpers::{ use helpers::{loc, loc_box, mut_map_from_pairs, parse_without_loc, zero_loc_expr};
empty_region, loc, loc_box, mut_map_from_pairs, parse_without_loc, zero_loc_expr,
};
use roc::canonicalize; use roc::canonicalize;
use roc::canonicalize::Expr::*; use roc::canonicalize::Expr::*;
use roc::canonicalize::Pattern::*; use roc::canonicalize::Pattern::*;
@ -142,7 +140,7 @@ mod test_canonicalize {
Procedure { Procedure {
name: Some("func".to_string()), name: Some("func".to_string()),
is_self_tail_recursive: false, is_self_tail_recursive: false,
definition: empty_region(), definition: Region::zero(),
args: vec![loc(Pattern::Identifier(sym("arg")))], args: vec![loc(Pattern::Identifier(sym("arg")))],
body: loc(Expr::Operator( body: loc(Expr::Operator(
loc_box(Expr::Var(sym("arg"))), loc_box(Expr::Var(sym("arg"))),

View file

@ -15,12 +15,12 @@ mod test_parser {
use roc::parse; use roc::parse;
use roc::parse::ast::Attempting; use roc::parse::ast::Attempting;
use roc::parse::ast::Expr::{self, *}; use roc::parse::ast::Expr::{self, *};
use roc::parse::parser::{Parser, State}; use roc::parse::parser::{Fail, FailReason, Parser, State};
use roc::parse::problems::Problem; use roc::parse::problems::Problem;
use roc::region::Located; use roc::region::{Located, Region};
fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) { fn assert_parses_to<'a>(input: &'a str, expected_expr: Expr<'a>) {
let state = State::new(&input, Attempting::Expression); let state = State::new(&input, Attempting::Module);
let arena = Bump::new(); let arena = Bump::new();
let parser = parse::expr(); let parser = parse::expr();
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);
@ -29,6 +29,17 @@ mod test_parser {
assert_eq!(Ok(expected_expr), actual); assert_eq!(Ok(expected_expr), actual);
} }
fn assert_parsing_fails<'a>(input: &'a str, reason: FailReason, attempting: Attempting) {
let state = State::new(&input, Attempting::Module);
let arena = Bump::new();
let parser = parse::expr();
let answer = parser.parse(&arena, state);
let actual = answer.map_err(|(fail, _)| fail);
let expected_fail = Fail { reason, attempting };
assert_eq!(Err(expected_fail), actual);
}
fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec<Located<Problem>>) { fn assert_malformed_str<'a>(input: &'a str, expected_probs: Vec<Located<Problem>>) {
let state = State::new(&input, Attempting::Expression); let state = State::new(&input, Attempting::Expression);
let arena = Bump::new(); let arena = Bump::new();
@ -244,6 +255,32 @@ mod test_parser {
// TODO verify that exceeding maximum line length does NOT panic // TODO verify that exceeding maximum line length does NOT panic
// TODO verify that exceeding maximum line count does NOT panic // TODO verify that exceeding maximum line count does NOT panic
#[test]
fn empty_source_file() {
assert_parsing_fails("", FailReason::Eof(Region::zero()), Attempting::Expression);
}
#[test]
fn first_line_too_long() {
let max_line_length = std::u16::MAX as usize;
// the string literal "ZZZZZZZZZ" but with way more Zs
let too_long_str_body: String = (1..max_line_length)
.into_iter()
.map(|_| "Z".to_string())
.collect();
let too_long_str = format!("\"{}\"", too_long_str_body);
// Make sure it's longer than our maximum line length
assert_eq!(too_long_str.len(), max_line_length + 1);
assert_parsing_fails(
&too_long_str,
FailReason::LineTooLong(0),
Attempting::Expression,
);
}
// NUMBER LITERALS // NUMBER LITERALS
#[test] #[test]
@ -259,4 +296,55 @@ mod test_parser {
assert_parses_to("-42", Int(-42)); assert_parses_to("-42", Int(-42));
assert_parses_to(&std::i64::MIN.to_string(), Int(std::i64::MIN)); assert_parses_to(&std::i64::MIN.to_string(), Int(std::i64::MIN));
} }
// fn expect_parsed_float<'a>(expected: f64, actual: &str) {
// assert_eq!(
// Ok((Float(expected), "".to_string())),
// parse_without_loc(actual)
// );
// }
// fn expect_parsed_int<'a>(expected: i64, actual: &str) {
// assert_eq!(
// Ok((Int(expected), "".to_string())),
// parse_without_loc(actual)
// );
// }
// #[test]
// fn positive_int() {
// expect_parsed_int(1234, "1234");
// }
// #[test]
// fn negative_int() {
// expect_parsed_int(-1234, "-1234");
// }
// #[test]
// fn positive_float() {
// expect_parsed_float(123.45, "123.45");
// expect_parsed_float(42.00, "42.00");
// }
// #[test]
// fn negative_float() {
// expect_parsed_float(-1234.567, "-1234.567");
// expect_parsed_float(-192.0, "-192.0");
// }
// #[test]
// fn ints_with_underscores() {
// expect_parsed_int(987654321, "987_6_5_432_1");
// expect_parsed_int(-1234567890, "-1_234_567_890");
// }
// #[test]
// fn fracs_with_spaces() {
// expect_parsed_float(-1234.567, "-1_23_4.567");
// expect_parsed_float(-192.0, "-19_2.0");
// expect_parsed_float(123.45, "1_2_3.45");
// expect_parsed_float(42.00, "4_2.00");
// }
} }