Add pretty reporting of errors

Most of the pretty printing code is adapted from Squiller, which I wrote
and own the copyright to. It is licensed Apache 2.0 as well.

This now makes it much easier to debug the parser because I can *see*
where it runs into an error. Right now it's something related to field
access, maybe I forgot to consume the token.
This commit is contained in:
Ruud van Asseldonk 2023-08-08 22:19:30 +02:00
parent 277c685e47
commit 5dbddeafbb
6 changed files with 219 additions and 26 deletions

9
Cargo.lock generated
View file

@ -5,3 +5,12 @@ version = 3
[[package]]
name = "rcl"
version = "0.0.0"
dependencies = [
"unicode-width",
]
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"

View file

@ -4,3 +4,6 @@ version = "0.0.0"
authors = ["Ruud van Asseldonk <dev@veniogames.com>"]
edition = "2021"
license = "Apache-2.0"
[dependencies]
unicode-width = "0.1.10"

View file

@ -7,28 +7,129 @@
//! Error types.
use crate::source::Span;
use crate::source::{DocId, Inputs, Span};
#[derive(Debug)]
pub struct Error {
// Not dead, used in the debug impl.
#[allow(dead_code)]
message: &'static str,
pub type Result<T> = std::result::Result<T, Box<dyn Error>>;
pub trait Error: std::fmt::Debug {
/// The source location of the error.
fn span(&self) -> Span;
/// The error message.
///
/// * Shorter is better.
/// * Simpler is better (no jargon).
/// * The expected thing goes first, the actual thing goes second.
fn message(&self) -> &str;
/// Optionally, a note about error.
///
/// For example, an unmatched parenthesis can point to the opening paren.
fn note(&self) -> Option<(&str, Span)>;
/// Optionally, a hint on how to fix the problem.
fn hint(&self) -> Option<&str>;
}
pub type Result<T> = std::result::Result<T, Error>;
impl dyn Error {
pub fn print(&self, inputs: &Inputs) {
let bold_red = "\x1b[31;1m";
let bold_yellow = "\x1b[33;1m";
let reset = "\x1b[0m";
impl Error {
// TODO: Add a better error type which records source span info etc.
pub fn new(message: &'static str) -> Error {
Error { message }
let highlight = highlight_span_in_line(inputs, self.span(), bold_red);
eprint!("{}", highlight);
eprintln!("{}Error:{} {}", bold_red, reset, self.message());
if let Some((note, note_span)) = self.note() {
let highlight = highlight_span_in_line(inputs, note_span, bold_yellow);
eprint!("\n{}", highlight);
eprintln!("{}Note:{} {}", bold_yellow, reset, note);
}
if let Some(hint) = self.hint() {
eprintln!("\n{}Hint:{} {}", bold_yellow, reset, hint);
}
}
}
impl From<&'static str> for Error {
fn from(message: &'static str) -> Error {
Error { message }
fn highlight_span_in_line(inputs: &Inputs, span: Span, highlight_ansi: &str) -> String {
use std::cmp;
use std::fmt::Write;
use unicode_width::UnicodeWidthStr;
let doc = &inputs[span.doc.0 as usize];
let input = doc.data;
// Locate the line that contains the error.
let mut line = 1;
let mut line_start = 0;
let mut line_end = 0;
for (&c, i) in input.as_bytes().iter().zip(0..) {
if i == span.start {
break;
}
if c == b'\n' {
line += 1;
line_start = i + 1;
}
}
for (&c, i) in input.as_bytes()[line_start..].iter().zip(line_start..) {
if c == b'\n' {
line_end = i;
break;
}
}
if line_end <= line_start {
line_end = input.len();
}
let line_content = &input[line_start..line_end];
// The length of the mark can be longer than the line, for example when
// token to mark was a multiline string literal. In that case, highlight
// only up to the newline, don't extend the tildes too far.
let indent_content = &line_content[..span.start - line_start];
let as_of_error = &line_content[span.start - line_start..];
let error_content = &as_of_error[..cmp::min(span.len as usize, as_of_error.len())];
// The width of the error is not necessarily the number of bytes,
// measure the Unicode width of the span to underline.
let indent_width = indent_content.width();
let mark_width = cmp::max(1, error_content.width());
let line_num_str = line.to_string();
let line_num_pad: String = line_num_str.chars().map(|_| ' ').collect();
let mark_indent: String = " ".repeat(indent_width);
let mark_under: String = "~".repeat(mark_width);
let reset = "\x1b[0m";
let mut result = String::new();
// Note, the unwraps here are safe because writing to a string does not fail.
writeln!(
&mut result,
"{}--> {}:{}:{}",
line_num_pad,
doc.path,
line,
span.start - line_start
)
.unwrap();
writeln!(&mut result, "{} |", line_num_pad).unwrap();
writeln!(&mut result, "{} | {}", line_num_str, line_content).unwrap();
writeln!(
&mut result,
"{} | {}{}^{}{}",
line_num_pad,
mark_indent,
highlight_ansi,
&mark_under[1..],
reset
)
.unwrap();
result
}
/// A syntax error that causes lexing or parsing to fail.
@ -38,3 +139,56 @@ pub struct ParseError {
pub message: &'static str,
pub note: Option<(&'static str, Span)>,
}
impl From<ParseError> for Box<dyn Error> {
fn from(err: ParseError) -> Self {
Box::new(err)
}
}
impl Error for ParseError {
fn span(&self) -> Span {
self.span
}
fn message(&self) -> &str {
self.message
}
fn note(&self) -> Option<(&str, Span)> {
self.note
}
fn hint(&self) -> Option<&str> {
None
}
}
#[derive(Debug)]
pub struct FixmeError {
// Not dead, used in the debug impl.
#[allow(dead_code)]
message: &'static str,
}
impl From<&'static str> for Box<dyn Error> {
fn from(err: &'static str) -> Self {
Box::new(FixmeError { message: err })
}
}
impl Error for FixmeError {
fn span(&self) -> Span {
Span {
doc: DocId(0),
start: 0,
len: 0,
}
}
fn message(&self) -> &str {
self.message
}
fn note(&self) -> Option<(&str, Span)> {
None
}
fn hint(&self) -> Option<&str> {
None
}
}

View file

@ -11,7 +11,7 @@ use std::collections::BTreeMap;
use std::rc::Rc;
use crate::ast::{BinOp, Compr, Expr, Seq, UnOp};
use crate::error::{Error, Result};
use crate::error::Result;
use crate::runtime::{Builtin, Env, Value};
pub fn eval(env: &mut Env, expr: &Expr) -> Result<Rc<Value>> {
@ -100,7 +100,7 @@ pub fn eval(env: &mut Env, expr: &Expr) -> Result<Rc<Value>> {
};
match builtin {
Some(b) => Ok(Rc::new(Value::Builtin(b))),
None => Err(Error::new("No such field in this list.")),
None => Err("No such field in this list.".into()),
}
}
not_map => {

View file

@ -2,8 +2,9 @@ use std::collections::BTreeMap;
use std::rc::Rc;
use rcl::ast::{BinOp, Compr, Expr, Ident, Seq, UnOp};
use rcl::error::Result;
use rcl::runtime::{Env, Value};
use rcl::source::DocId;
use rcl::source::{DocId, Document, Inputs};
/// Helpers for constructing AST in code.
pub fn var(name: Ident) -> Expr {
@ -176,6 +177,21 @@ fn example_env() -> Env {
env
}
fn main_tags(inputs: &Inputs) -> rcl::error::Result<()> {
for (i, doc) in inputs.iter().enumerate() {
let id = DocId(i as u32);
let tokens = rcl::lexer::lex(id, doc.data)?;
for (token, span) in &tokens {
eprintln!("{span:?} {token:?}");
}
let cst = rcl::parser::parse(id, doc.data)?;
eprintln!("{cst:#?}");
}
Ok(())
}
fn main() {
let expr = example_ast();
let mut env = example_env();
@ -186,12 +202,14 @@ fn main() {
rcl::json::format_json(result.as_ref(), &mut result_json).expect("Failed to format json.");
println!("{}", result_json);
let data = std::fs::read_to_string("examples/tags.rcl").expect("Failed to load example.");
let tokens = rcl::lexer::lex(DocId(0), &data).expect("Failed to parse.");
for (token, span) in &tokens {
eprintln!("{span:?} {token:?}");
let fname = "examples/tags.rcl";
let data = std::fs::read_to_string(fname).expect("Failed to load example.");
let doc = Document {
path: fname,
data: &data,
};
let inputs = [doc];
if let Err(err) = main_tags(&inputs) {
err.print(&inputs);
}
let cst = rcl::parser::parse(DocId(0), &data).expect("Failed to parse.");
eprintln!("{cst:#?}");
}

View file

@ -7,8 +7,17 @@
//! Types for dealing with input source code.
/// A named input document.
pub struct Document<'a> {
/// Path can be a file path, but also a name such as "stdin".
pub path: &'a str,
/// The contents of the file.
pub data: &'a str,
}
/// A list of input documents.
pub type Inputs<'a> = [&'a str];
pub type Inputs<'a> = [Document<'a>];
/// The index of a document in the list of input files.
#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
@ -57,7 +66,7 @@ impl<'a> Source<'a> for &'a str {
impl<'a> Source<'a> for &Inputs<'a> {
fn resolve(self, span: Span) -> &'a str {
let doc = self[span.doc.0 as usize];
let doc = self[span.doc.0 as usize].data;
&doc[span.start..span.end()]
}
}