[red-knot] Report line numbers in mdtest relative to the markdown file, not the test snippet (#13804)

Co-authored-by: Alex Waygood <alex.waygood@gmail.com> Co-authored-by: Micha Reiser <micha@reiser.io> Co-authored-by: Carl Meyer <carl@oddbird.net>
2025-10-03 15:14:42 +00:00 · 2024-10-22 03:42:40 -04:00 · 2024-10-22 03:42:40 -04:00 · cd6c937194
commit cd6c937194
parent 9d102799f9
4 changed files with 130 additions and 53 deletions
--- a/crates/red_knot_test/src/lib.rs
+++ b/crates/red_knot_test/src/lib.rs
@ -1,14 +1,13 @@
 use colored::Colorize;
 use parser as test_parser;
 use red_knot_python_semantic::types::check_types;
-use ruff_db::files::{system_path_to_file, Files};
+use ruff_db::files::{system_path_to_file, File, Files};
 use ruff_db::parsed::parsed_module;
 use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
-use std::collections::BTreeMap;
+use ruff_source_file::LineIndex;
 use ruff_text_size::TextSize;
 use std::path::Path;
 type Failures = BTreeMap<SystemPathBuf, matcher::FailuresByLine>;
 mod assertion;
 mod db;
 mod diagnostic;
@ -40,20 +39,24 @@ pub fn run(path: &Path, title: &str) {
            any_failures = true;
            println!("\n{}\n", test.name().bold().underline());
-            for (path, by_line) in failures {
+            let md_index = LineIndex::from_source_text(&source);
-                println!("{}", path.as_str().bold());
+
-                for (line_number, failures) in by_line.iter() {
+            for test_failures in failures {
                let backtick_line = md_index.line_index(test_failures.backtick_offset);
                for (relative_line_number, failures) in test_failures.by_line.iter() {
                    for failure in failures {
-                        let line_info = format!("line {line_number}:").cyan();
+                        let absolute_line_number =
                            backtick_line.checked_add(relative_line_number).unwrap();
                        let line_info = format!("{title}:{absolute_line_number}").cyan();
                        println!("    {line_info} {failure}");
                    }
                }
                println!();
            }
        }
    }
-    println!("{}\n", "-".repeat(50));
+    println!("\n{}\n", "-".repeat(50));
    assert!(!any_failures, "Some tests failed.");
 }
@ -61,40 +64,69 @@ pub fn run(path: &Path, title: &str) {
 fn run_test(db: &mut db::Db, test: &parser::MarkdownTest) -> Result<(), Failures> {
    let workspace_root = db.workspace_root().to_path_buf();
-    let mut system_paths = vec![];
+    let test_files: Vec<_> = test
-
+        .files()
-    for file in test.files() {
+        .map(|embedded| {
            assert!(
-            matches!(file.lang, "py" | "pyi"),
+                matches!(embedded.lang, "py" | "pyi"),
                "Non-Python files not supported yet."
            );
-        let full_path = workspace_root.join(file.path);
+            let full_path = workspace_root.join(embedded.path);
-        db.write_file(&full_path, file.code).unwrap();
+            db.write_file(&full_path, embedded.code).unwrap();
-        system_paths.push(full_path);
+            let file = system_path_to_file(db, full_path).unwrap();
            TestFile {
                file,
                backtick_offset: embedded.md_offset,
            }
        })
        .collect();
-    let mut failures = BTreeMap::default();
+    let failures: Failures = test_files
-
+        .into_iter()
-    for path in system_paths {
+        .filter_map(|test_file| {
-        let file = system_path_to_file(db, path.clone()).unwrap();
+            let parsed = parsed_module(db, test_file.file);
        let parsed = parsed_module(db, file);
            // TODO allow testing against code with syntax errors
            assert!(
                parsed.errors().is_empty(),
-            "Python syntax errors in {}, {:?}: {:?}",
+                "Python syntax errors in {}, {}: {:?}",
                test.name(),
-            path,
+                test_file.file.path(db),
                parsed.errors()
            );
-        matcher::match_file(db, file, check_types(db, file)).unwrap_or_else(|line_failures| {
+            match matcher::match_file(db, test_file.file, check_types(db, test_file.file)) {
-            failures.insert(path, line_failures);
+                Ok(()) => None,
-        });
+                Err(line_failures) => Some(FileFailures {
                    backtick_offset: test_file.backtick_offset,
                    by_line: line_failures,
                }),
            }
        })
        .collect();
    if failures.is_empty() {
        Ok(())
    } else {
        Err(failures)
    }
 }
 type Failures = Vec<FileFailures>;
 /// The failures for a single file in a test by line number.
 struct FileFailures {
    /// The offset of the backticks that starts the code block in the Markdown file
    backtick_offset: TextSize,
    /// The failures by lines in the code block.
    by_line: matcher::FailuresByLine,
 }
 /// File in a test.
 struct TestFile {
    file: File,
    // Offset of the backticks that starts the code block in the Markdown file
    backtick_offset: TextSize,
 }
--- a/crates/red_knot_test/src/parser.rs
+++ b/crates/red_knot_test/src/parser.rs
@ -1,8 +1,12 @@
 use std::sync::LazyLock;
 use memchr::memchr2;
 use regex::{Captures, Match, Regex};
 use ruff_index::{newtype_index, IndexVec};
 use rustc_hash::{FxHashMap, FxHashSet};
-use std::sync::LazyLock;
+
 use ruff_index::{newtype_index, IndexVec};
 use ruff_python_trivia::Cursor;
 use ruff_text_size::{TextLen, TextSize};
 /// Parse the Markdown `source` as a test suite with given `title`.
 pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
@ -132,6 +136,9 @@ pub(crate) struct EmbeddedFile<'s> {
    pub(crate) path: &'s str,
    pub(crate) lang: &'s str,
    pub(crate) code: &'s str,
    /// The offset of the backticks beginning the code block within the markdown file
    pub(crate) md_offset: TextSize,
 }
 /// Matches a sequence of `#` characters, followed by a title heading, followed by a newline.
@ -185,7 +192,9 @@ struct Parser<'s> {
    files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,
    /// The unparsed remainder of the Markdown source.
-    unparsed: &'s str,
+    cursor: Cursor<'s>,
    source_len: TextSize,
    /// Stack of ancestor sections.
    stack: SectionStack,
@ -205,7 +214,8 @@ impl<'s> Parser<'s> {
        Self {
            sections,
            files: IndexVec::default(),
-            unparsed: source,
+            cursor: Cursor::new(source),
            source_len: source.text_len(),
            stack: SectionStack::new(root_section_id),
            current_section_files: None,
        }
@ -227,26 +237,23 @@ impl<'s> Parser<'s> {
    }
    fn parse_impl(&mut self) -> anyhow::Result<()> {
-        while let Some(position) = memchr2(b'`', b'#', self.unparsed.as_bytes()) {
+        while let Some(position) = memchr2(b'`', b'#', self.cursor.as_bytes()) {
-            let (before, after) = self.unparsed.split_at(position);
+            self.cursor.skip_bytes(position.saturating_sub(1));
            self.unparsed = after;
            // code blocks and headers must start on a new line.
-            if before.is_empty() || before.ends_with('\n') {
+            if position == 0 || self.cursor.eat_char('\n') {
-                let c = after.as_bytes()[0] as char;
+                match self.cursor.first() {
                match c {
                    '#' => {
-                        if let Some(find) = HEADER_RE.find(self.unparsed) {
+                        if let Some(find) = HEADER_RE.find(self.cursor.as_str()) {
                            self.parse_header(find.as_str())?;
-                            self.unparsed = &self.unparsed[find.end()..];
+                            self.cursor.skip_bytes(find.len());
                            continue;
                        }
                    }
                    '`' => {
-                        if let Some(captures) = CODE_RE.captures(self.unparsed) {
+                        if let Some(captures) = CODE_RE.captures(self.cursor.as_str()) {
                            self.parse_code_block(&captures)?;
-                            self.unparsed = &self.unparsed[captures.get(0).unwrap().end()..];
+                            self.cursor.skip_bytes(captures.get(0).unwrap().len());
                            continue;
                        }
                    }
@ -255,8 +262,8 @@ impl<'s> Parser<'s> {
            }
            // Skip to the end of the line
-            if let Some(position) = memchr::memchr(b'\n', self.unparsed.as_bytes()) {
+            if let Some(position) = memchr::memchr(b'\n', self.cursor.as_bytes()) {
-                self.unparsed = &self.unparsed[position + 1..];
+                self.cursor.skip_bytes(position);
            } else {
                break;
            }
@ -336,6 +343,8 @@ impl<'s> Parser<'s> {
                .unwrap_or_default(),
            // CODE_RE can't match without matches for 'lang' and 'code'.
            code: captures.name("code").unwrap().into(),
            md_offset: self.offset(),
        });
        if let Some(current_files) = &mut self.current_section_files {
@ -368,6 +377,11 @@ impl<'s> Parser<'s> {
            self.current_section_files = None;
        }
    }
    /// Retrieves the current offset of the cursor within the source code.
    fn offset(&self) -> TextSize {
        self.source_len - self.cursor.text_len()
    }
 }
 #[cfg(test)]
--- a/crates/ruff_python_trivia/src/cursor.rs
+++ b/crates/ruff_python_trivia/src/cursor.rs
@ -26,6 +26,16 @@ impl<'a> Cursor<'a> {
        self.chars.clone()
    }
    /// Returns the remaining input as byte slice.
    pub fn as_bytes(&self) -> &'a [u8] {
        self.as_str().as_bytes()
    }
    /// Returns the remaining input as string slice.
    pub fn as_str(&self) -> &'a str {
        self.chars.as_str()
    }
    /// Peeks the next character from the input stream without consuming it.
    /// Returns [`EOF_CHAR`] if the file is at the end of the file.
    pub fn first(&self) -> char {
@ -110,4 +120,13 @@ impl<'a> Cursor<'a> {
            self.bump_back();
        }
    }
    /// Skips the next `count` bytes.
    ///
    /// ## Panics
    ///  - If `count` is larger than the remaining bytes in the input stream.
    ///  - If `count` indexes into a multi-byte character.
    pub fn skip_bytes(&mut self, count: usize) {
        self.chars = self.chars.as_str()[count..].chars();
    }
 }
--- a/crates/ruff_source_file/src/line_index.rs
+++ b/crates/ruff_source_file/src/line_index.rs
@ -394,6 +394,18 @@ impl OneIndexed {
            None => Self::MIN,
        }
    }
    /// Checked addition. Returns `None` if overflow occurred.
    #[must_use]
    pub fn checked_add(self, rhs: Self) -> Option<Self> {
        self.0.checked_add(rhs.0.get()).map(Self)
    }
    /// Checked subtraction. Returns `None` if overflow occurred.
    #[must_use]
    pub fn checked_sub(self, rhs: Self) -> Option<Self> {
        self.0.get().checked_sub(rhs.get()).and_then(Self::new)
    }
 }
 impl fmt::Display for OneIndexed {