Add pretty reporting of errors

Most of the pretty printing code is adapted from Squiller, which I wrote and own the copyright to. It is licensed Apache 2.0 as well. This now makes it much easier to debug the parser because I can *see* where it runs into an error. Right now it's something related to field access, maybe I forgot to consume the token.
2025-12-23 04:47:19 +00:00 · 2023-08-08 22:19:30 +02:00 · 2023-08-08 22:19:30 +02:00 · 5dbddeafbb
commit 5dbddeafbb
parent 277c685e47
6 changed files with 219 additions and 26 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -5,3 +5,12 @@ version = 3
 [[package]]
 name = "rcl"
 version = "0.0.0"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "unicode-width"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -4,3 +4,6 @@ version = "0.0.0"
 authors = ["Ruud van Asseldonk <dev@veniogames.com>"]
 edition = "2021"
 license = "Apache-2.0"
+
+[dependencies]
+unicode-width = "0.1.10"
--- a/src/error.rs
+++ b/src/error.rs
@ -7,28 +7,129 @@

 //! Error types.

-use crate::source::Span;
+use crate::source::{DocId, Inputs, Span};

-#[derive(Debug)]
-pub struct Error {
-    // Not dead, used in the debug impl.
-    #[allow(dead_code)]
-    message: &'static str,
+pub type Result<T> = std::result::Result<T, Box<dyn Error>>;
+
+pub trait Error: std::fmt::Debug {
+    /// The source location of the error.
+    fn span(&self) -> Span;
+
+    /// The error message.
+    ///
+    ///  * Shorter is better.
+    ///  * Simpler is better (no jargon).
+    ///  * The expected thing goes first, the actual thing goes second.
+    fn message(&self) -> &str;
+
+    /// Optionally, a note about error.
+    ///
+    /// For example, an unmatched parenthesis can point to the opening paren.
+    fn note(&self) -> Option<(&str, Span)>;
+
+    /// Optionally, a hint on how to fix the problem.
+    fn hint(&self) -> Option<&str>;
 }

-pub type Result<T> = std::result::Result<T, Error>;
+impl dyn Error {
+    pub fn print(&self, inputs: &Inputs) {
+        let bold_red = "\x1b[31;1m";
+        let bold_yellow = "\x1b[33;1m";
+        let reset = "\x1b[0m";

-impl Error {
-    // TODO: Add a better error type which records source span info etc.
-    pub fn new(message: &'static str) -> Error {
-        Error { message }
+        let highlight = highlight_span_in_line(inputs, self.span(), bold_red);
+        eprint!("{}", highlight);
+        eprintln!("{}Error:{} {}", bold_red, reset, self.message());
+
+        if let Some((note, note_span)) = self.note() {
+            let highlight = highlight_span_in_line(inputs, note_span, bold_yellow);
+            eprint!("\n{}", highlight);
+            eprintln!("{}Note:{} {}", bold_yellow, reset, note);
+        }
+
+        if let Some(hint) = self.hint() {
+            eprintln!("\n{}Hint:{} {}", bold_yellow, reset, hint);
+        }
    }
 }

-impl From<&'static str> for Error {
-    fn from(message: &'static str) -> Error {
-        Error { message }
+fn highlight_span_in_line(inputs: &Inputs, span: Span, highlight_ansi: &str) -> String {
+    use std::cmp;
+    use std::fmt::Write;
+    use unicode_width::UnicodeWidthStr;
+
+    let doc = &inputs[span.doc.0 as usize];
+    let input = doc.data;
+
+    // Locate the line that contains the error.
+    let mut line = 1;
+    let mut line_start = 0;
+    let mut line_end = 0;
+    for (&c, i) in input.as_bytes().iter().zip(0..) {
+        if i == span.start {
+            break;
+        }
+        if c == b'\n' {
+            line += 1;
+            line_start = i + 1;
+        }
    }
+    for (&c, i) in input.as_bytes()[line_start..].iter().zip(line_start..) {
+        if c == b'\n' {
+            line_end = i;
+            break;
+        }
+    }
+    if line_end <= line_start {
+        line_end = input.len();
+    }
+
+    let line_content = &input[line_start..line_end];
+
+    // The length of the mark can be longer than the line, for example when
+    // token to mark was a multiline string literal. In that case, highlight
+    // only up to the newline, don't extend the tildes too far.
+    let indent_content = &line_content[..span.start - line_start];
+    let as_of_error = &line_content[span.start - line_start..];
+    let error_content = &as_of_error[..cmp::min(span.len as usize, as_of_error.len())];
+
+    // The width of the error is not necessarily the number of bytes,
+    // measure the Unicode width of the span to underline.
+    let indent_width = indent_content.width();
+    let mark_width = cmp::max(1, error_content.width());
+
+    let line_num_str = line.to_string();
+    let line_num_pad: String = line_num_str.chars().map(|_| ' ').collect();
+    let mark_indent: String = " ".repeat(indent_width);
+    let mark_under: String = "~".repeat(mark_width);
+
+    let reset = "\x1b[0m";
+
+    let mut result = String::new();
+    // Note, the unwraps here are safe because writing to a string does not fail.
+    writeln!(
+        &mut result,
+        "{}--> {}:{}:{}",
+        line_num_pad,
+        doc.path,
+        line,
+        span.start - line_start
+    )
+    .unwrap();
+    writeln!(&mut result, "{} |", line_num_pad).unwrap();
+    writeln!(&mut result, "{} | {}", line_num_str, line_content).unwrap();
+    writeln!(
+        &mut result,
+        "{} | {}{}^{}{}",
+        line_num_pad,
+        mark_indent,
+        highlight_ansi,
+        &mark_under[1..],
+        reset
+    )
+    .unwrap();
+
+    result
 }

 /// A syntax error that causes lexing or parsing to fail.
@ -38,3 +139,56 @@ pub struct ParseError {
    pub message: &'static str,
    pub note: Option<(&'static str, Span)>,
 }
+
+impl From<ParseError> for Box<dyn Error> {
+    fn from(err: ParseError) -> Self {
+        Box::new(err)
+    }
+}
+
+impl Error for ParseError {
+    fn span(&self) -> Span {
+        self.span
+    }
+    fn message(&self) -> &str {
+        self.message
+    }
+    fn note(&self) -> Option<(&str, Span)> {
+        self.note
+    }
+    fn hint(&self) -> Option<&str> {
+        None
+    }
+}
+
+#[derive(Debug)]
+pub struct FixmeError {
+    // Not dead, used in the debug impl.
+    #[allow(dead_code)]
+    message: &'static str,
+}
+
+impl From<&'static str> for Box<dyn Error> {
+    fn from(err: &'static str) -> Self {
+        Box::new(FixmeError { message: err })
+    }
+}
+
+impl Error for FixmeError {
+    fn span(&self) -> Span {
+        Span {
+            doc: DocId(0),
+            start: 0,
+            len: 0,
+        }
+    }
+    fn message(&self) -> &str {
+        self.message
+    }
+    fn note(&self) -> Option<(&str, Span)> {
+        None
+    }
+    fn hint(&self) -> Option<&str> {
+        None
+    }
+}
--- a/src/eval.rs
+++ b/src/eval.rs
@ -11,7 +11,7 @@ use std::collections::BTreeMap;
 use std::rc::Rc;

 use crate::ast::{BinOp, Compr, Expr, Seq, UnOp};
-use crate::error::{Error, Result};
+use crate::error::Result;
 use crate::runtime::{Builtin, Env, Value};

 pub fn eval(env: &mut Env, expr: &Expr) -> Result<Rc<Value>> {
@ -100,7 +100,7 @@ pub fn eval(env: &mut Env, expr: &Expr) -> Result<Rc<Value>> {
                    };
                    match builtin {
                        Some(b) => Ok(Rc::new(Value::Builtin(b))),
-                        None => Err(Error::new("No such field in this list.")),
+                        None => Err("No such field in this list.".into()),
                    }
                }
                not_map => {
--- a/src/main.rs
+++ b/src/main.rs
@ -2,8 +2,9 @@ use std::collections::BTreeMap;
 use std::rc::Rc;

 use rcl::ast::{BinOp, Compr, Expr, Ident, Seq, UnOp};
+use rcl::error::Result;
 use rcl::runtime::{Env, Value};
-use rcl::source::DocId;
+use rcl::source::{DocId, Document, Inputs};

 /// Helpers for constructing AST in code.
 pub fn var(name: Ident) -> Expr {
@ -176,6 +177,21 @@ fn example_env() -> Env {
    env
 }

+fn main_tags(inputs: &Inputs) -> rcl::error::Result<()> {
+    for (i, doc) in inputs.iter().enumerate() {
+        let id = DocId(i as u32);
+        let tokens = rcl::lexer::lex(id, doc.data)?;
+        for (token, span) in &tokens {
+            eprintln!("{span:?} {token:?}");
+        }
+
+        let cst = rcl::parser::parse(id, doc.data)?;
+        eprintln!("{cst:#?}");
+    }
+
+    Ok(())
+}
+
 fn main() {
    let expr = example_ast();
    let mut env = example_env();
@ -186,12 +202,14 @@ fn main() {
    rcl::json::format_json(result.as_ref(), &mut result_json).expect("Failed to format json.");
    println!("{}", result_json);

-    let data = std::fs::read_to_string("examples/tags.rcl").expect("Failed to load example.");
-    let tokens = rcl::lexer::lex(DocId(0), &data).expect("Failed to parse.");
-    for (token, span) in &tokens {
-        eprintln!("{span:?} {token:?}");
+    let fname = "examples/tags.rcl";
+    let data = std::fs::read_to_string(fname).expect("Failed to load example.");
+    let doc = Document {
+        path: fname,
+        data: &data,
+    };
+    let inputs = [doc];
+    if let Err(err) = main_tags(&inputs) {
+        err.print(&inputs);
    }
-
-    let cst = rcl::parser::parse(DocId(0), &data).expect("Failed to parse.");
-    eprintln!("{cst:#?}");
 }
--- a/src/source.rs
+++ b/src/source.rs
@ -7,8 +7,17 @@

 //! Types for dealing with input source code.

+/// A named input document.
+pub struct Document<'a> {
+    /// Path can be a file path, but also a name such as "stdin".
+    pub path: &'a str,
+
+    /// The contents of the file.
+    pub data: &'a str,
+}
+
 /// A list of input documents.
-pub type Inputs<'a> = [&'a str];
+pub type Inputs<'a> = [Document<'a>];

 /// The index of a document in the list of input files.
 #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
@ -57,7 +66,7 @@ impl<'a> Source<'a> for &'a str {

 impl<'a> Source<'a> for &Inputs<'a> {
    fn resolve(self, span: Span) -> &'a str {
-        let doc = self[span.doc.0 as usize];
+        let doc = self[span.doc.0 as usize].data;
        &doc[span.start..span.end()]
    }
 }