Introduce SourceError to represent an error + original source

This commit is contained in:
Joshua Warner 2022-01-01 14:40:03 -07:00
parent 8d6eb178b1
commit 5c1084c453
9 changed files with 96 additions and 48 deletions

View file

@ -8,7 +8,7 @@ use crate::ident::{lowercase_ident, unqualified_ident, uppercase_ident};
use crate::parser::Progress::{self, *};
use crate::parser::{
backtrackable, specialize, word1, word2, EEffects, EExposes, EHeader, EImports, EPackages,
EProvides, ERequires, ETypedIdent, Parser, SyntaxError,
EProvides, ERequires, ETypedIdent, Parser, SyntaxError, SourceError,
};
use crate::state::State;
use crate::string_literal;
@ -39,10 +39,10 @@ pub fn module_defs<'a>() -> impl Parser<'a, Vec<'a, Loc<Def<'a>>>, SyntaxError<'
pub fn parse_header<'a>(
arena: &'a bumpalo::Bump,
state: State<'a>,
) -> Result<(Module<'a>, State<'a>), EHeader<'a>> {
) -> Result<(Module<'a>, State<'a>), SourceError<'a, EHeader<'a>>> {
match header().parse(arena, state) {
Ok((_, module, state)) => Ok((module, state)),
Err((_, fail, _)) => Err(fail),
Err((_, fail, state)) => Err(SourceError::new(fail, &state)),
}
}
@ -167,6 +167,7 @@ fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>, ()> {
|_, mut state: State<'a>| match chomp_module_name(state.bytes()) {
Ok(name) => {
let width = name.len();
state.xyzlcol.column += width as u16;
state = state.advance(width);
Ok((MadeProgress, ModuleName::new(name), state))
@ -435,7 +436,7 @@ fn platform_requires<'a>() -> impl Parser<'a, PlatformRequires<'a>, ERequires<'a
#[inline(always)]
fn requires_rigids<'a>(
min_indent: u32,
min_indent: u16,
) -> impl Parser<'a, Collection<'a, Loc<Spaced<'a, PlatformRigid<'a>>>>, ERequires<'a>> {
collection_trailing_sep_e!(
word1(b'{', ERequires::ListStart),
@ -513,7 +514,7 @@ fn exposes_values<'a>() -> impl Parser<
}
fn spaces_around_keyword<'a, E>(
min_indent: u32,
min_indent: u16,
keyword: &'static str,
expectation: fn(Position) -> E,
space_problem: fn(crate::parser::BadInputError, Position) -> E,

View file

@ -225,19 +225,55 @@ pub fn bad_input_to_syntax_error<'a>(bad_input: BadInputError, pos: Position) ->
}
}
impl<'a> SyntaxError<'a> {
impl<'a, T> SourceError<'a, T> {
pub fn new(
problem: T,
state: &State<'a>,
) -> Self {
Self {
problem,
bytes: state.original_bytes(),
}
}
pub fn map_problem<E>(self, f: impl FnOnce(T) -> E) -> SourceError<'a, E> {
SourceError {
problem: f(self.problem),
bytes: self.bytes,
}
}
pub fn into_parse_problem(
self,
filename: std::path::PathBuf,
prefix: &'a str,
bytes: &'a [u8],
) -> ParseProblem<'a, T> {
ParseProblem {
pos: Position::default(),
problem: self.problem,
filename,
bytes: self.bytes,
}
}
}
impl<'a> SyntaxError<'a> {
pub fn into_source_error(self, state: &State<'a>) -> SourceError<'a, SyntaxError<'a>> {
SourceError {
problem: self,
bytes: state.original_bytes(),
}
}
pub fn into_parse_problem(
self,
filename: std::path::PathBuf,
state: &State<'a>,
) -> ParseProblem<'a, SyntaxError<'a>> {
ParseProblem {
pos: Position::default(),
problem: self,
filename,
bytes,
prefix,
bytes: state.original_bytes(),
}
}
}
@ -561,14 +597,18 @@ pub enum ETypeInlineAlias {
ArgumentNotLowercase(Position),
}
#[derive(Debug)]
pub struct SourceError<'a, T> {
pub problem: T,
pub bytes: &'a [u8],
}
#[derive(Debug)]
pub struct ParseProblem<'a, T> {
pub pos: Position,
pub problem: T,
pub filename: std::path::PathBuf,
pub bytes: &'a [u8],
/// prefix is usually the header (for parse problems in the body), or empty
pub prefix: &'a str,
}
pub trait Parser<'a, Output, Error> {

View file

@ -7,11 +7,12 @@ use std::fmt;
#[derive(Clone)]
pub struct State<'a> {
/// The raw input bytes from the file.
/// Beware: bytes[0] always points the the current byte the parser is examining.
bytes: &'a [u8],
/// Beware: original_bytes[0] always points the the start of the file.
/// Use bytes()[0] to access the current byte the parser is inspecting
original_bytes: &'a [u8],
/// Length of the original input in bytes
input_len: usize,
/// Offset in original_bytes that the parser is currently inspecting
offset: usize,
/// Position of the start of the current line
line_start: Position,
@ -24,15 +25,19 @@ pub struct State<'a> {
impl<'a> State<'a> {
pub fn new(bytes: &'a [u8]) -> State<'a> {
State {
bytes,
input_len: bytes.len(),
original_bytes: bytes,
offset: 0,
line_start: Position::zero(),
indent_column: 0,
}
}
pub fn original_bytes(&self) -> &'a [u8] {
self.original_bytes
}
pub fn bytes(&self) -> &'a [u8] {
self.bytes
&self.original_bytes[self.offset..]
}
pub fn column(&self) -> u32 {
@ -43,26 +48,26 @@ impl<'a> State<'a> {
pub fn advance(&self, offset: usize) -> State<'a> {
let mut state = self.clone();
// debug_assert!(!state.bytes[..offset].iter().any(|b| *b == b'\n'));
state.bytes = &state.bytes[offset..];
state.offset += offset;
state
}
#[must_use]
pub fn advance_newline(&self) -> State<'a> {
let mut state = self.clone();
state.bytes = &state.bytes[1..];
state.offset += 1;
state.line_start = state.pos();
state
}
/// Returns the current position
pub const fn pos(&self) -> Position {
Position::new((self.input_len - self.bytes.len()) as u32)
Position::new(self.offset as u32)
}
/// Returns whether the parser has reached the end of the input
pub const fn has_reached_end(&self) -> bool {
self.bytes.is_empty()
self.offset == self.original_bytes.len()
}
/// Returns a Region corresponding to the current state, but
@ -88,9 +93,9 @@ impl<'a> fmt::Debug for State<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "State {{")?;
match std::str::from_utf8(self.bytes) {
match std::str::from_utf8(self.bytes()) {
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes())?,
}
write!(f, "\n\t(offset): {:?},", self.pos())?;

View file

@ -2,6 +2,7 @@ use crate::ast;
use crate::module::module_defs;
// use crate::module::module_defs;
use crate::parser::Parser;
use crate::parser::SourceError;
use crate::parser::SyntaxError;
use crate::state::State;
use bumpalo::collections::Vec as BumpVec;
@ -12,19 +13,21 @@ pub fn parse_expr_with<'a>(
arena: &'a Bump,
input: &'a str,
) -> Result<ast::Expr<'a>, SyntaxError<'a>> {
parse_loc_with(arena, input).map(|loc_expr| loc_expr.value)
parse_loc_with(arena, input)
.map(|loc_expr| loc_expr.value)
.map_err(|e| e.problem)
}
#[allow(dead_code)]
pub fn parse_loc_with<'a>(
arena: &'a Bump,
input: &'a str,
) -> Result<Loc<ast::Expr<'a>>, SyntaxError<'a>> {
) -> Result<Loc<ast::Expr<'a>>, SourceError<'a, SyntaxError<'a>>> {
let state = State::new(input.trim().as_bytes());
match crate::expr::test_parse_expr(0, arena, state) {
match crate::expr::test_parse_expr(0, arena, state.clone()) {
Ok(loc_expr) => Ok(loc_expr),
Err(fail) => Err(SyntaxError::Expr(fail)),
Err(fail) => Err(SyntaxError::Expr(fail).into_source_error(&state)),
}
}