From 5dbddeafbbc1dd602d992afa925dbb7e6604a0ca Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 8 Aug 2023 22:19:30 +0200 Subject: [PATCH] Add pretty reporting of errors Most of the pretty printing code is adapted from Squiller, which I wrote and own the copyright to. It is licensed Apache 2.0 as well. This now makes it much easier to debug the parser because I can *see* where it runs into an error. Right now it's something related to field access, maybe I forgot to consume the token. --- Cargo.lock | 9 +++ Cargo.toml | 3 + src/error.rs | 182 ++++++++++++++++++++++++++++++++++++++++++++++---- src/eval.rs | 4 +- src/main.rs | 34 +++++++--- src/source.rs | 13 +++- 6 files changed, 219 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13d2768..99d52fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,3 +5,12 @@ version = 3 [[package]] name = "rcl" version = "0.0.0" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" diff --git a/Cargo.toml b/Cargo.toml index d7262e3..f70bfda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,6 @@ version = "0.0.0" authors = ["Ruud van Asseldonk "] edition = "2021" license = "Apache-2.0" + +[dependencies] +unicode-width = "0.1.10" diff --git a/src/error.rs b/src/error.rs index 498e1e6..0b3e67b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -7,28 +7,129 @@ //! Error types. -use crate::source::Span; +use crate::source::{DocId, Inputs, Span}; -#[derive(Debug)] -pub struct Error { - // Not dead, used in the debug impl. - #[allow(dead_code)] - message: &'static str, +pub type Result = std::result::Result>; + +pub trait Error: std::fmt::Debug { + /// The source location of the error. + fn span(&self) -> Span; + + /// The error message. + /// + /// * Shorter is better. + /// * Simpler is better (no jargon). + /// * The expected thing goes first, the actual thing goes second. + fn message(&self) -> &str; + + /// Optionally, a note about error. + /// + /// For example, an unmatched parenthesis can point to the opening paren. + fn note(&self) -> Option<(&str, Span)>; + + /// Optionally, a hint on how to fix the problem. + fn hint(&self) -> Option<&str>; } -pub type Result = std::result::Result; +impl dyn Error { + pub fn print(&self, inputs: &Inputs) { + let bold_red = "\x1b[31;1m"; + let bold_yellow = "\x1b[33;1m"; + let reset = "\x1b[0m"; -impl Error { - // TODO: Add a better error type which records source span info etc. - pub fn new(message: &'static str) -> Error { - Error { message } + let highlight = highlight_span_in_line(inputs, self.span(), bold_red); + eprint!("{}", highlight); + eprintln!("{}Error:{} {}", bold_red, reset, self.message()); + + if let Some((note, note_span)) = self.note() { + let highlight = highlight_span_in_line(inputs, note_span, bold_yellow); + eprint!("\n{}", highlight); + eprintln!("{}Note:{} {}", bold_yellow, reset, note); + } + + if let Some(hint) = self.hint() { + eprintln!("\n{}Hint:{} {}", bold_yellow, reset, hint); + } } } -impl From<&'static str> for Error { - fn from(message: &'static str) -> Error { - Error { message } +fn highlight_span_in_line(inputs: &Inputs, span: Span, highlight_ansi: &str) -> String { + use std::cmp; + use std::fmt::Write; + use unicode_width::UnicodeWidthStr; + + let doc = &inputs[span.doc.0 as usize]; + let input = doc.data; + + // Locate the line that contains the error. + let mut line = 1; + let mut line_start = 0; + let mut line_end = 0; + for (&c, i) in input.as_bytes().iter().zip(0..) { + if i == span.start { + break; + } + if c == b'\n' { + line += 1; + line_start = i + 1; + } } + for (&c, i) in input.as_bytes()[line_start..].iter().zip(line_start..) { + if c == b'\n' { + line_end = i; + break; + } + } + if line_end <= line_start { + line_end = input.len(); + } + + let line_content = &input[line_start..line_end]; + + // The length of the mark can be longer than the line, for example when + // token to mark was a multiline string literal. In that case, highlight + // only up to the newline, don't extend the tildes too far. + let indent_content = &line_content[..span.start - line_start]; + let as_of_error = &line_content[span.start - line_start..]; + let error_content = &as_of_error[..cmp::min(span.len as usize, as_of_error.len())]; + + // The width of the error is not necessarily the number of bytes, + // measure the Unicode width of the span to underline. + let indent_width = indent_content.width(); + let mark_width = cmp::max(1, error_content.width()); + + let line_num_str = line.to_string(); + let line_num_pad: String = line_num_str.chars().map(|_| ' ').collect(); + let mark_indent: String = " ".repeat(indent_width); + let mark_under: String = "~".repeat(mark_width); + + let reset = "\x1b[0m"; + + let mut result = String::new(); + // Note, the unwraps here are safe because writing to a string does not fail. + writeln!( + &mut result, + "{}--> {}:{}:{}", + line_num_pad, + doc.path, + line, + span.start - line_start + ) + .unwrap(); + writeln!(&mut result, "{} |", line_num_pad).unwrap(); + writeln!(&mut result, "{} | {}", line_num_str, line_content).unwrap(); + writeln!( + &mut result, + "{} | {}{}^{}{}", + line_num_pad, + mark_indent, + highlight_ansi, + &mark_under[1..], + reset + ) + .unwrap(); + + result } /// A syntax error that causes lexing or parsing to fail. @@ -38,3 +139,56 @@ pub struct ParseError { pub message: &'static str, pub note: Option<(&'static str, Span)>, } + +impl From for Box { + fn from(err: ParseError) -> Self { + Box::new(err) + } +} + +impl Error for ParseError { + fn span(&self) -> Span { + self.span + } + fn message(&self) -> &str { + self.message + } + fn note(&self) -> Option<(&str, Span)> { + self.note + } + fn hint(&self) -> Option<&str> { + None + } +} + +#[derive(Debug)] +pub struct FixmeError { + // Not dead, used in the debug impl. + #[allow(dead_code)] + message: &'static str, +} + +impl From<&'static str> for Box { + fn from(err: &'static str) -> Self { + Box::new(FixmeError { message: err }) + } +} + +impl Error for FixmeError { + fn span(&self) -> Span { + Span { + doc: DocId(0), + start: 0, + len: 0, + } + } + fn message(&self) -> &str { + self.message + } + fn note(&self) -> Option<(&str, Span)> { + None + } + fn hint(&self) -> Option<&str> { + None + } +} diff --git a/src/eval.rs b/src/eval.rs index 5c3a942..7b4327f 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -11,7 +11,7 @@ use std::collections::BTreeMap; use std::rc::Rc; use crate::ast::{BinOp, Compr, Expr, Seq, UnOp}; -use crate::error::{Error, Result}; +use crate::error::Result; use crate::runtime::{Builtin, Env, Value}; pub fn eval(env: &mut Env, expr: &Expr) -> Result> { @@ -100,7 +100,7 @@ pub fn eval(env: &mut Env, expr: &Expr) -> Result> { }; match builtin { Some(b) => Ok(Rc::new(Value::Builtin(b))), - None => Err(Error::new("No such field in this list.")), + None => Err("No such field in this list.".into()), } } not_map => { diff --git a/src/main.rs b/src/main.rs index 74cf95d..6a08b0d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,8 +2,9 @@ use std::collections::BTreeMap; use std::rc::Rc; use rcl::ast::{BinOp, Compr, Expr, Ident, Seq, UnOp}; +use rcl::error::Result; use rcl::runtime::{Env, Value}; -use rcl::source::DocId; +use rcl::source::{DocId, Document, Inputs}; /// Helpers for constructing AST in code. pub fn var(name: Ident) -> Expr { @@ -176,6 +177,21 @@ fn example_env() -> Env { env } +fn main_tags(inputs: &Inputs) -> rcl::error::Result<()> { + for (i, doc) in inputs.iter().enumerate() { + let id = DocId(i as u32); + let tokens = rcl::lexer::lex(id, doc.data)?; + for (token, span) in &tokens { + eprintln!("{span:?} {token:?}"); + } + + let cst = rcl::parser::parse(id, doc.data)?; + eprintln!("{cst:#?}"); + } + + Ok(()) +} + fn main() { let expr = example_ast(); let mut env = example_env(); @@ -186,12 +202,14 @@ fn main() { rcl::json::format_json(result.as_ref(), &mut result_json).expect("Failed to format json."); println!("{}", result_json); - let data = std::fs::read_to_string("examples/tags.rcl").expect("Failed to load example."); - let tokens = rcl::lexer::lex(DocId(0), &data).expect("Failed to parse."); - for (token, span) in &tokens { - eprintln!("{span:?} {token:?}"); + let fname = "examples/tags.rcl"; + let data = std::fs::read_to_string(fname).expect("Failed to load example."); + let doc = Document { + path: fname, + data: &data, + }; + let inputs = [doc]; + if let Err(err) = main_tags(&inputs) { + err.print(&inputs); } - - let cst = rcl::parser::parse(DocId(0), &data).expect("Failed to parse."); - eprintln!("{cst:#?}"); } diff --git a/src/source.rs b/src/source.rs index d7f338d..ca78473 100644 --- a/src/source.rs +++ b/src/source.rs @@ -7,8 +7,17 @@ //! Types for dealing with input source code. +/// A named input document. +pub struct Document<'a> { + /// Path can be a file path, but also a name such as "stdin". + pub path: &'a str, + + /// The contents of the file. + pub data: &'a str, +} + /// A list of input documents. -pub type Inputs<'a> = [&'a str]; +pub type Inputs<'a> = [Document<'a>]; /// The index of a document in the list of input files. #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] @@ -57,7 +66,7 @@ impl<'a> Source<'a> for &'a str { impl<'a> Source<'a> for &Inputs<'a> { fn resolve(self, span: Span) -> &'a str { - let doc = self[span.doc.0 as usize]; + let doc = self[span.doc.0 as usize].data; &doc[span.start..span.end()] } }