[red-knot] type inference/checking test framework (#13636)

## Summary Adds a markdown-based test framework for writing tests of type inference and type checking. Fixes #11664. Implements the basic required features. A markdown test file is a suite of tests, each test can contain one or more Python files, with optionally specified path/name. The test writes all files to an in-memory file system, runs red-knot, and matches the resulting diagnostics against `Type: ` and `Error: ` assertions embedded in the Python source as comments. We will want to add features like incremental tests, setting custom configuration for tests, writing non-Python files, testing syntax errors, capturing full diagnostic output, etc. There's also plenty of room for improved UX (colored output?). ## Test Plan Lots of tests! Sample of the current output when a test fails: ``` Running tests/inference.rs (target/debug/deps/inference-7c96590aa84de2a4) running 1 test test inference::path_1_resources_inference_numbers_md ... FAILED failures: ---- inference::path_1_resources_inference_numbers_md stdout ---- inference/numbers.md - Numbers - Floats /src/test.py line 2: unexpected error: [invalid-assignment] "Object of type `Literal["str"]` is not assignable to `int`" thread 'inference::path_1_resources_inference_numbers_md' panicked at crates/red_knot_test/src/lib.rs:60:5: Some tests failed. note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace failures: inference::path_1_resources_inference_numbers_md test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.19s error: test failed, to rerun pass `-p red_knot_test --test inference` ``` --------- Co-authored-by: Micha Reiser <micha@reiser.io> Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
2025-08-04 18:58:04 +00:00 · 2024-10-08 12:33:19 -07:00 · 2024-10-08 12:33:19 -07:00 · 93eff7f174
commit 93eff7f174
parent fc661e193a
17 changed files with 2504 additions and 25 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -148,7 +148,7 @@ jobs:
      # sync, not just public items. Eventually we should do this for all
      # crates; for now add crates here as they are warning-clean to prevent
      # regression.
-      - run: cargo doc --no-deps -p red_knot_python_semantic -p red_knot -p ruff_db --document-private-items
+      - run: cargo doc --no-deps -p red_knot_python_semantic -p red_knot -p red_knot_test -p ruff_db --document-private-items
        env:
          # Setting RUSTDOCFLAGS because `cargo doc --check` isn't yet implemented (https://github.com/rust-lang/cargo/issues/10025).
          RUSTDOCFLAGS: "-D warnings"
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2084,7 +2084,9 @@ dependencies = [
 "insta",
 "itertools 0.13.0",
 "ordermap",
+ "red_knot_test",
 "red_knot_vendored",
+ "rstest",
 "ruff_db",
 "ruff_index",
 "ruff_python_ast",
@ -2127,6 +2129,25 @@ dependencies = [
 "tracing-subscriber",
 ]

+[[package]]
+name = "red_knot_test"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "once_cell",
+ "red_knot_python_semantic",
+ "red_knot_vendored",
+ "regex",
+ "ruff_db",
+ "ruff_index",
+ "ruff_python_trivia",
+ "ruff_source_file",
+ "ruff_text_size",
+ "rustc-hash 2.0.0",
+ "salsa",
+ "smallvec",
+]
+
 [[package]]
 name = "red_knot_vendored"
 version = "0.0.0"
@ -2247,6 +2268,12 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"

+[[package]]
+name = "relative-path"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
+
 [[package]]
 name = "ring"
 version = "0.17.8"
@ -2262,6 +2289,33 @@ dependencies = [
 "windows-sys 0.52.0",
 ]

+[[package]]
+name = "rstest"
+version = "0.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936"
+dependencies = [
+ "rstest_macros",
+ "rustc_version",
+]
+
+[[package]]
+name = "rstest_macros"
+version = "0.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42"
+dependencies = [
+ "cfg-if",
+ "glob",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "relative-path",
+ "rustc_version",
+ "syn",
+ "unicode-ident",
+]
+
 [[package]]
 name = "ruff"
 version = "0.6.9"
@ -2885,6 +2939,15 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"

+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
 [[package]]
 name = "rustix"
 version = "0.38.37"
@ -3030,6 +3093,12 @@ version = "4.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"

+[[package]]
+name = "semver"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
+
 [[package]]
 name = "serde"
 version = "1.0.210"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -39,6 +39,7 @@ ruff_workspace = { path = "crates/ruff_workspace" }

 red_knot_python_semantic = { path = "crates/red_knot_python_semantic" }
 red_knot_server = { path = "crates/red_knot_server" }
+red_knot_test = { path = "crates/red_knot_test" }
 red_knot_workspace = { path = "crates/red_knot_workspace", default-features = false }

 aho-corasick = { version = "1.1.3" }
@ -114,6 +115,7 @@ quote = { version = "1.0.23" }
 rand = { version = "0.8.5" }
 rayon = { version = "1.10.0" }
 regex = { version = "1.10.2" }
+rstest = { version = "0.22.0", default-features = false }
 rustc-hash = { version = "2.0.0" }
 salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "4a7c955255e707e64e43f3ce5eabb771ae067768" }
 schemars = { version = "0.8.16" }
--- a/crates/red_knot_python_semantic/Cargo.toml
+++ b/crates/red_knot_python_semantic/Cargo.toml
@ -38,10 +38,12 @@ test-case = { workspace = true }
 [dev-dependencies]
 ruff_db = { workspace = true, features = ["os", "testing"] }
 ruff_python_parser = { workspace = true }
+red_knot_test = { workspace = true }
 red_knot_vendored = { workspace = true }

 anyhow = { workspace = true }
 insta = { workspace = true }
+rstest = { workspace = true }
 tempfile = { workspace = true }

 [lints]
--- a/crates/red_knot_python_semantic/resources/mdtest/numbers.md
+++ b/crates/red_knot_python_semantic/resources/mdtest/numbers.md
@ -0,0 +1,35 @@
+# Numbers
+
+## Integers
+
+### Literals
+
+We can infer an integer literal type:
+
+```py
+reveal_type(1)  # revealed: Literal[1]
+```
+
+### Overflow
+
+We only track integer literals within the range of an i64:
+
+```py
+reveal_type(9223372036854775808)  # revealed: int
+```
+
+## Floats
+
+There aren't literal float types, but we infer the general float type:
+
+```py
+reveal_type(1.0)  # revealed: float
+```
+
+## Complex
+
+Same for complex:
+
+```py
+reveal_type(2j)  # revealed: complex
+```
--- a/crates/red_knot_python_semantic/src/types.rs
+++ b/crates/red_knot_python_semantic/src/types.rs
@ -17,7 +17,7 @@ use crate::types::narrow::narrowing_constraint;
 use crate::{Db, FxOrderSet, Module};

 pub(crate) use self::builder::{IntersectionBuilder, UnionBuilder};
-pub(crate) use self::diagnostic::TypeCheckDiagnostics;
+pub use self::diagnostic::{TypeCheckDiagnostic, TypeCheckDiagnostics};
 pub(crate) use self::display::TypeArrayDisplay;
 pub(crate) use self::infer::{
    infer_deferred_types, infer_definition_types, infer_expression_types, infer_scope_types,
--- a/crates/red_knot_python_semantic/src/types/infer.rs
+++ b/crates/red_knot_python_semantic/src/types/infer.rs
@ -3683,28 +3683,6 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn number_literal() -> anyhow::Result<()> {
-        let mut db = setup_db();
-
-        db.write_dedented(
-            "src/a.py",
-            "
-            a = 1
-            b = 9223372036854775808
-            c = 1.45
-            d = 2j
-            ",
-        )?;
-
-        assert_public_ty(&db, "src/a.py", "a", "Literal[1]");
-        assert_public_ty(&db, "src/a.py", "b", "int");
-        assert_public_ty(&db, "src/a.py", "c", "float");
-        assert_public_ty(&db, "src/a.py", "d", "complex");
-
-        Ok(())
-    }
-
    #[test]
    fn negated_int_literal() -> anyhow::Result<()> {
        let mut db = setup_db();
--- a/crates/red_knot_python_semantic/tests/mdtest.rs
+++ b/crates/red_knot_python_semantic/tests/mdtest.rs
@ -0,0 +1,13 @@
+use red_knot_test::run;
+use std::path::PathBuf;
+
+#[rstest::rstest]
+fn mdtest(#[files("resources/mdtest/**/*.md")] path: PathBuf) {
+    let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("resources")
+        .join("mdtest")
+        .canonicalize()
+        .unwrap();
+    let title = path.strip_prefix(crate_dir).unwrap();
+    run(&path, title.as_os_str().to_str().unwrap());
+}
--- a/crates/red_knot_test/Cargo.toml
+++ b/crates/red_knot_test/Cargo.toml
@ -0,0 +1,32 @@
+[package]
+name = "red_knot_test"
+version = "0.0.0"
+publish = false
+edition.workspace = true
+rust-version.workspace = true
+homepage.workspace = true
+documentation.workspace = true
+repository.workspace = true
+authors.workspace = true
+license.workspace = true
+
+[dependencies]
+red_knot_python_semantic = { workspace = true }
+red_knot_vendored = { workspace = true }
+ruff_db = { workspace = true }
+ruff_index = { workspace = true }
+ruff_python_trivia = { workspace = true }
+ruff_source_file = { workspace = true }
+ruff_text_size = { workspace = true }
+
+anyhow = { workspace = true }
+once_cell = { workspace = true }
+regex = { workspace = true }
+rustc-hash = { workspace = true }
+salsa = { workspace = true }
+smallvec = { workspace = true }
+
+[dev-dependencies]
+
+[lints]
+workspace = true
--- a/crates/red_knot_test/src/assertion.rs
+++ b/crates/red_knot_test/src/assertion.rs
@ -0,0 +1,621 @@
+//! Parse type and type-error assertions in Python comment form.
+//!
+//! Parses comments of the form `# revealed: SomeType` and `# error: 8 [rule-code] "message text"`.
+//! In the latter case, the `8` is a column number, and `"message text"` asserts that the full
+//! diagnostic message contains the text `"message text"`; all three are optional (`# error:` will
+//! match any error.)
+//!
+//! Assertion comments may be placed at end-of-line:
+//!
+//! ```py
+//! x: int = "foo"  # error: [invalid-assignment]
+//! ```
+//!
+//! Or as a full-line comment on the preceding line:
+//!
+//! ```py
+//! # error: [invalid-assignment]
+//! x: int = "foo"
+//! ```
+//!
+//! Multiple assertion comments may apply to the same line; in this case all (or all but the last)
+//! must be full-line comments:
+//!
+//! ```py
+//! # error: [unbound-name]
+//! reveal_type(x)  # revealed: Unbound
+//! ```
+//!
+//! or
+//!
+//! ```py
+//! # error: [unbound-name]
+//! # revealed: Unbound
+//! reveal_type(x)
+//! ```
+
+use crate::db::Db;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use ruff_db::files::File;
+use ruff_db::parsed::parsed_module;
+use ruff_db::source::{line_index, source_text, SourceText};
+use ruff_python_trivia::CommentRanges;
+use ruff_source_file::{LineIndex, Locator, OneIndexed};
+use ruff_text_size::{Ranged, TextRange};
+use smallvec::SmallVec;
+use std::ops::Deref;
+
+/// Diagnostic assertion comments in a single embedded file.
+#[derive(Debug)]
+pub(crate) struct InlineFileAssertions {
+    comment_ranges: CommentRanges,
+    source: SourceText,
+    lines: LineIndex,
+}
+
+impl InlineFileAssertions {
+    pub(crate) fn from_file(db: &Db, file: File) -> Self {
+        let source = source_text(db, file);
+        let lines = line_index(db, file);
+        let parsed = parsed_module(db, file);
+        let comment_ranges = CommentRanges::from(parsed.tokens());
+        Self {
+            comment_ranges,
+            source,
+            lines,
+        }
+    }
+
+    fn locator(&self) -> Locator {
+        Locator::with_index(&self.source, self.lines.clone())
+    }
+
+    fn line_number(&self, range: &impl Ranged) -> OneIndexed {
+        self.lines.line_index(range.start())
+    }
+
+    fn is_own_line_comment(&self, ranged_assertion: &AssertionWithRange) -> bool {
+        CommentRanges::is_own_line(ranged_assertion.start(), &self.locator())
+    }
+}
+
+impl<'a> IntoIterator for &'a InlineFileAssertions {
+    type Item = LineAssertions<'a>;
+    type IntoIter = LineAssertionsIterator<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        Self::IntoIter {
+            file_assertions: self,
+            inner: AssertionWithRangeIterator {
+                file_assertions: self,
+                inner: self.comment_ranges.into_iter(),
+            }
+            .peekable(),
+        }
+    }
+}
+
+/// An [`Assertion`] with the [`TextRange`] of its original inline comment.
+#[derive(Debug)]
+struct AssertionWithRange<'a>(Assertion<'a>, TextRange);
+
+impl<'a> Deref for AssertionWithRange<'a> {
+    type Target = Assertion<'a>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl Ranged for AssertionWithRange<'_> {
+    fn range(&self) -> TextRange {
+        self.1
+    }
+}
+
+impl<'a> From<AssertionWithRange<'a>> for Assertion<'a> {
+    fn from(value: AssertionWithRange<'a>) -> Self {
+        value.0
+    }
+}
+
+/// Iterator that yields all assertions within a single embedded Python file.
+#[derive(Debug)]
+struct AssertionWithRangeIterator<'a> {
+    file_assertions: &'a InlineFileAssertions,
+    inner: std::iter::Copied<std::slice::Iter<'a, TextRange>>,
+}
+
+impl<'a> Iterator for AssertionWithRangeIterator<'a> {
+    type Item = AssertionWithRange<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let locator = self.file_assertions.locator();
+        loop {
+            let inner_next = self.inner.next()?;
+            let comment = locator.slice(inner_next);
+            if let Some(assertion) = Assertion::from_comment(comment) {
+                return Some(AssertionWithRange(assertion, inner_next));
+            };
+        }
+    }
+}
+
+impl std::iter::FusedIterator for AssertionWithRangeIterator<'_> {}
+
+/// A vector of [`Assertion`]s belonging to a single line.
+///
+/// Most lines will have zero or one assertion, so we use a [`SmallVec`] optimized for a single
+/// element to avoid most heap vector allocations.
+type AssertionVec<'a> = SmallVec<[Assertion<'a>; 1]>;
+
+#[derive(Debug)]
+pub(crate) struct LineAssertionsIterator<'a> {
+    file_assertions: &'a InlineFileAssertions,
+    inner: std::iter::Peekable<AssertionWithRangeIterator<'a>>,
+}
+
+impl<'a> Iterator for LineAssertionsIterator<'a> {
+    type Item = LineAssertions<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let file = self.file_assertions;
+        let ranged_assertion = self.inner.next()?;
+        let mut collector = AssertionVec::new();
+        let mut line_number = file.line_number(&ranged_assertion);
+        // Collect all own-line comments on consecutive lines; these all apply to the same line of
+        // code. For example:
+        //
+        // ```py
+        // # error: [unbound-name]
+        // # revealed: Unbound
+        // reveal_type(x)
+        // ```
+        //
+        if file.is_own_line_comment(&ranged_assertion) {
+            collector.push(ranged_assertion.into());
+            let mut only_own_line = true;
+            while let Some(ranged_assertion) = self.inner.peek() {
+                let next_line_number = line_number.saturating_add(1);
+                if file.line_number(ranged_assertion) == next_line_number {
+                    if !file.is_own_line_comment(ranged_assertion) {
+                        only_own_line = false;
+                    }
+                    line_number = next_line_number;
+                    collector.push(self.inner.next().unwrap().into());
+                    // If we see an end-of-line comment, it has to be the end of the stack,
+                    // otherwise we'd botch this case, attributing all three errors to the `bar`
+                    // line:
+                    //
+                    // ```py
+                    // # error:
+                    // foo  # error:
+                    // bar  # error:
+                    // ```
+                    //
+                    if !only_own_line {
+                        break;
+                    }
+                } else {
+                    break;
+                }
+            }
+            if only_own_line {
+                // The collected comments apply to the _next_ line in the code.
+                line_number = line_number.saturating_add(1);
+            }
+        } else {
+            // We have a line-trailing comment; it applies to its own line, and is not grouped.
+            collector.push(ranged_assertion.into());
+        }
+        Some(LineAssertions {
+            line_number,
+            assertions: collector,
+        })
+    }
+}
+
+impl std::iter::FusedIterator for LineAssertionsIterator<'_> {}
+
+/// One or more assertions referring to the same line of code.
+#[derive(Debug)]
+pub(crate) struct LineAssertions<'a> {
+    /// The line these assertions refer to.
+    ///
+    /// Not necessarily the same line the assertion comment is located on; for an own-line comment,
+    /// it's the next non-assertion line.
+    pub(crate) line_number: OneIndexed,
+
+    /// The assertions referring to this line.
+    pub(crate) assertions: AssertionVec<'a>,
+}
+
+impl<'a> Deref for LineAssertions<'a> {
+    type Target = [Assertion<'a>];
+
+    fn deref(&self) -> &Self::Target {
+        &self.assertions
+    }
+}
+
+static TYPE_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^#\s*revealed:\s*(?<ty_display>.+?)\s*$").unwrap());
+
+static ERROR_RE: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(
+        r#"^#\s*error:(\s*(?<column>\d+))?(\s*\[(?<rule>.+?)\])?(\s*"(?<message>.+?)")?\s*$"#,
+    )
+    .unwrap()
+});
+
+/// A single diagnostic assertion comment.
+#[derive(Debug)]
+pub(crate) enum Assertion<'a> {
+    /// A `revealed: ` assertion.
+    Revealed(&'a str),
+
+    /// An `error: ` assertion.
+    Error(ErrorAssertion<'a>),
+}
+
+impl<'a> Assertion<'a> {
+    fn from_comment(comment: &'a str) -> Option<Self> {
+        if let Some(caps) = TYPE_RE.captures(comment) {
+            Some(Self::Revealed(caps.name("ty_display").unwrap().as_str()))
+        } else {
+            ERROR_RE.captures(comment).map(|caps| {
+                Self::Error(ErrorAssertion {
+                    rule: caps.name("rule").map(|m| m.as_str()),
+                    column: caps.name("column").and_then(|m| m.as_str().parse().ok()),
+                    message_contains: caps.name("message").map(|m| m.as_str()),
+                })
+            })
+        }
+    }
+}
+
+impl std::fmt::Display for Assertion<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Revealed(expected_type) => write!(f, "revealed: {expected_type}"),
+            Self::Error(assertion) => assertion.fmt(f),
+        }
+    }
+}
+
+/// An `error: ` assertion comment.
+#[derive(Debug)]
+pub(crate) struct ErrorAssertion<'a> {
+    /// The diagnostic rule code we expect.
+    pub(crate) rule: Option<&'a str>,
+
+    /// The column we expect the diagnostic range to start at.
+    pub(crate) column: Option<OneIndexed>,
+
+    /// A string we expect to be contained in the diagnostic message.
+    pub(crate) message_contains: Option<&'a str>,
+}
+
+impl std::fmt::Display for ErrorAssertion<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("error:")?;
+        if let Some(column) = self.column {
+            write!(f, " {column}")?;
+        }
+        if let Some(rule) = self.rule {
+            write!(f, " [{rule}]")?;
+        }
+        if let Some(message) = self.message_contains {
+            write!(f, r#" "{message}""#)?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{Assertion, InlineFileAssertions, LineAssertions};
+    use ruff_db::files::system_path_to_file;
+    use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
+    use ruff_python_trivia::textwrap::dedent;
+    use ruff_source_file::OneIndexed;
+
+    fn get_assertions(source: &str) -> InlineFileAssertions {
+        let mut db = crate::db::Db::setup(SystemPathBuf::from("/src"));
+        db.write_file("/src/test.py", source).unwrap();
+        let file = system_path_to_file(&db, "/src/test.py").unwrap();
+        InlineFileAssertions::from_file(&db, file)
+    }
+
+    fn as_vec(assertions: &InlineFileAssertions) -> Vec<LineAssertions> {
+        assertions.into_iter().collect()
+    }
+
+    #[test]
+    fn ty_display() {
+        let assertions = get_assertions(&dedent(
+            "
+            reveal_type(1)  # revealed: Literal[1]
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(format!("{assert}"), "revealed: Literal[1]");
+    }
+
+    #[test]
+    fn error() {
+        let assertions = get_assertions(&dedent(
+            "
+            x  # error:
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(format!("{assert}"), "error:");
+    }
+
+    #[test]
+    fn prior_line() {
+        let assertions = get_assertions(&dedent(
+            "
+            # revealed: Literal[1]
+            reveal_type(1)
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(format!("{assert}"), "revealed: Literal[1]");
+    }
+
+    #[test]
+    fn stacked_prior_line() {
+        let assertions = get_assertions(&dedent(
+            "
+            # revealed: Unbound
+            # error: [unbound-name]
+            reveal_type(x)
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(3));
+
+        let [assert1, assert2] = &line.assertions[..] else {
+            panic!("expected two assertions");
+        };
+
+        assert_eq!(format!("{assert1}"), "revealed: Unbound");
+        assert_eq!(format!("{assert2}"), "error: [unbound-name]");
+    }
+
+    #[test]
+    fn stacked_mixed() {
+        let assertions = get_assertions(&dedent(
+            "
+            # revealed: Unbound
+            reveal_type(x) # error: [unbound-name]
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
+
+        let [assert1, assert2] = &line.assertions[..] else {
+            panic!("expected two assertions");
+        };
+
+        assert_eq!(format!("{assert1}"), "revealed: Unbound");
+        assert_eq!(format!("{assert2}"), "error: [unbound-name]");
+    }
+
+    #[test]
+    fn multiple_lines() {
+        let assertions = get_assertions(&dedent(
+            r#"
+            # error: [invalid-assignment]
+            x: int = "foo"
+            y  # error: [unbound-name]
+            "#,
+        ));
+
+        let [line1, line2] = &as_vec(&assertions)[..] else {
+            panic!("expected two lines");
+        };
+
+        assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(2));
+        assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(3));
+
+        let [Assertion::Error(error1)] = &line1.assertions[..] else {
+            panic!("expected one error assertion");
+        };
+
+        assert_eq!(error1.rule, Some("invalid-assignment"));
+
+        let [Assertion::Error(error2)] = &line2.assertions[..] else {
+            panic!("expected one error assertion");
+        };
+
+        assert_eq!(error2.rule, Some("unbound-name"));
+    }
+
+    #[test]
+    fn multiple_lines_mixed_stack() {
+        let assertions = get_assertions(&dedent(
+            r#"
+            # error: [invalid-assignment]
+            x: int = reveal_type("foo")  # revealed: str
+            y  # error: [unbound-name]
+            "#,
+        ));
+
+        let [line1, line2] = &as_vec(&assertions)[..] else {
+            panic!("expected two lines");
+        };
+
+        assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(2));
+        assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(3));
+
+        let [Assertion::Error(error1), Assertion::Revealed(expected_ty)] = &line1.assertions[..]
+        else {
+            panic!("expected one error assertion and one Revealed assertion");
+        };
+
+        assert_eq!(error1.rule, Some("invalid-assignment"));
+        assert_eq!(*expected_ty, "str");
+
+        let [Assertion::Error(error2)] = &line2.assertions[..] else {
+            panic!("expected one error assertion");
+        };
+
+        assert_eq!(error2.rule, Some("unbound-name"));
+    }
+
+    #[test]
+    fn error_with_rule() {
+        let assertions = get_assertions(&dedent(
+            "
+            x  # error: [unbound-name]
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(format!("{assert}"), "error: [unbound-name]");
+    }
+
+    #[test]
+    fn error_with_rule_and_column() {
+        let assertions = get_assertions(&dedent(
+            "
+            x  # error: 1 [unbound-name]
+            ",
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(format!("{assert}"), "error: 1 [unbound-name]");
+    }
+
+    #[test]
+    fn error_with_rule_and_message() {
+        let assertions = get_assertions(&dedent(
+            r#"
+            # error: [unbound-name] "`x` is unbound"
+            x
+            "#,
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(
+            format!("{assert}"),
+            r#"error: [unbound-name] "`x` is unbound""#
+        );
+    }
+
+    #[test]
+    fn error_with_message_and_column() {
+        let assertions = get_assertions(&dedent(
+            r#"
+            # error: 1 "`x` is unbound"
+            x
+            "#,
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(format!("{assert}"), r#"error: 1 "`x` is unbound""#);
+    }
+
+    #[test]
+    fn error_with_rule_and_message_and_column() {
+        let assertions = get_assertions(&dedent(
+            r#"
+            # error: 1 [unbound-name] "`x` is unbound"
+            x
+            "#,
+        ));
+
+        let [line] = &as_vec(&assertions)[..] else {
+            panic!("expected one line");
+        };
+
+        assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
+
+        let [assert] = &line.assertions[..] else {
+            panic!("expected one assertion");
+        };
+
+        assert_eq!(
+            format!("{assert}"),
+            r#"error: 1 [unbound-name] "`x` is unbound""#
+        );
+    }
+}
--- a/crates/red_knot_test/src/db.rs
+++ b/crates/red_knot_test/src/db.rs
@ -0,0 +1,88 @@
+use red_knot_python_semantic::{
+    Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings,
+};
+use ruff_db::files::{File, Files};
+use ruff_db::system::SystemPathBuf;
+use ruff_db::system::{DbWithTestSystem, System, TestSystem};
+use ruff_db::vendored::VendoredFileSystem;
+use ruff_db::{Db as SourceDb, Upcast};
+
+#[salsa::db]
+pub(crate) struct Db {
+    storage: salsa::Storage<Self>,
+    files: Files,
+    system: TestSystem,
+    vendored: VendoredFileSystem,
+}
+
+impl Db {
+    pub(crate) fn setup(workspace_root: SystemPathBuf) -> Self {
+        let db = Self {
+            storage: salsa::Storage::default(),
+            system: TestSystem::default(),
+            vendored: red_knot_vendored::file_system().clone(),
+            files: Files::default(),
+        };
+
+        db.memory_file_system()
+            .create_directory_all(&workspace_root)
+            .unwrap();
+
+        Program::from_settings(
+            &db,
+            &ProgramSettings {
+                target_version: PythonVersion::default(),
+                search_paths: SearchPathSettings::new(workspace_root),
+            },
+        )
+        .expect("Invalid search path settings");
+
+        db
+    }
+}
+
+impl DbWithTestSystem for Db {
+    fn test_system(&self) -> &TestSystem {
+        &self.system
+    }
+
+    fn test_system_mut(&mut self) -> &mut TestSystem {
+        &mut self.system
+    }
+}
+
+#[salsa::db]
+impl SourceDb for Db {
+    fn vendored(&self) -> &VendoredFileSystem {
+        &self.vendored
+    }
+
+    fn system(&self) -> &dyn System {
+        &self.system
+    }
+
+    fn files(&self) -> &Files {
+        &self.files
+    }
+}
+
+impl Upcast<dyn SourceDb> for Db {
+    fn upcast(&self) -> &(dyn SourceDb + 'static) {
+        self
+    }
+    fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
+        self
+    }
+}
+
+#[salsa::db]
+impl SemanticDb for Db {
+    fn is_file_open(&self, file: File) -> bool {
+        !file.path(self).is_vendored_path()
+    }
+}
+
+#[salsa::db]
+impl salsa::Database for Db {
+    fn salsa_event(&self, _event: &dyn Fn() -> salsa::Event) {}
+}
--- a/crates/red_knot_test/src/diagnostic.rs
+++ b/crates/red_knot_test/src/diagnostic.rs
@ -0,0 +1,173 @@
+//! Sort and group diagnostics by line number, so they can be correlated with assertions.
+//!
+//! We don't assume that we will get the diagnostics in source order.
+
+use ruff_source_file::{LineIndex, OneIndexed};
+use ruff_text_size::Ranged;
+use std::ops::{Deref, Range};
+
+/// All diagnostics for one embedded Python file, sorted and grouped by start line number.
+///
+/// The diagnostics are kept in a flat vector, sorted by line number. A separate vector of
+/// [`LineDiagnosticRange`] has one entry for each contiguous slice of the diagnostics vector
+/// containing diagnostics which all start on the same line.
+#[derive(Debug)]
+pub(crate) struct SortedDiagnostics<T> {
+    diagnostics: Vec<T>,
+    line_ranges: Vec<LineDiagnosticRange>,
+}
+
+impl<T> SortedDiagnostics<T>
+where
+    T: Ranged + Clone,
+{
+    pub(crate) fn new(diagnostics: impl IntoIterator<Item = T>, line_index: &LineIndex) -> Self {
+        let mut diagnostics: Vec<_> = diagnostics
+            .into_iter()
+            .map(|diagnostic| DiagnosticWithLine {
+                line_number: line_index.line_index(diagnostic.start()),
+                diagnostic,
+            })
+            .collect();
+        diagnostics.sort_unstable_by_key(|diagnostic_with_line| diagnostic_with_line.line_number);
+
+        let mut diags = Self {
+            diagnostics: Vec::with_capacity(diagnostics.len()),
+            line_ranges: vec![],
+        };
+
+        let mut current_line_number = None;
+        let mut start = 0;
+        for DiagnosticWithLine {
+            line_number,
+            diagnostic,
+        } in diagnostics
+        {
+            match current_line_number {
+                None => {
+                    current_line_number = Some(line_number);
+                }
+                Some(current) => {
+                    if line_number != current {
+                        let end = diags.diagnostics.len();
+                        diags.line_ranges.push(LineDiagnosticRange {
+                            line_number: current,
+                            diagnostic_index_range: start..end,
+                        });
+                        start = end;
+                        current_line_number = Some(line_number);
+                    }
+                }
+            }
+            diags.diagnostics.push(diagnostic);
+        }
+        if let Some(line_number) = current_line_number {
+            diags.line_ranges.push(LineDiagnosticRange {
+                line_number,
+                diagnostic_index_range: start..diags.diagnostics.len(),
+            });
+        }
+
+        diags
+    }
+
+    pub(crate) fn iter_lines(&self) -> LineDiagnosticsIterator<T> {
+        LineDiagnosticsIterator {
+            diagnostics: self.diagnostics.as_slice(),
+            inner: self.line_ranges.iter(),
+        }
+    }
+}
+
+/// Range delineating diagnostics in [`SortedDiagnostics`] that begin on a single line.
+#[derive(Debug)]
+struct LineDiagnosticRange {
+    line_number: OneIndexed,
+    diagnostic_index_range: Range<usize>,
+}
+
+/// Iterator to group sorted diagnostics by line.
+pub(crate) struct LineDiagnosticsIterator<'a, T> {
+    diagnostics: &'a [T],
+    inner: std::slice::Iter<'a, LineDiagnosticRange>,
+}
+
+impl<'a, T> Iterator for LineDiagnosticsIterator<'a, T>
+where
+    T: Ranged + Clone,
+{
+    type Item = LineDiagnostics<'a, T>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let LineDiagnosticRange {
+            line_number,
+            diagnostic_index_range,
+        } = self.inner.next()?;
+        Some(LineDiagnostics {
+            line_number: *line_number,
+            diagnostics: &self.diagnostics[diagnostic_index_range.clone()],
+        })
+    }
+}
+
+impl<T> std::iter::FusedIterator for LineDiagnosticsIterator<'_, T> where T: Clone + Ranged {}
+
+/// All diagnostics that start on a single line of source code in one embedded Python file.
+#[derive(Debug)]
+pub(crate) struct LineDiagnostics<'a, T> {
+    /// Line number on which these diagnostics start.
+    pub(crate) line_number: OneIndexed,
+
+    /// Diagnostics starting on this line.
+    pub(crate) diagnostics: &'a [T],
+}
+
+impl<T> Deref for LineDiagnostics<'_, T> {
+    type Target = [T];
+
+    fn deref(&self) -> &Self::Target {
+        self.diagnostics
+    }
+}
+
+#[derive(Debug)]
+struct DiagnosticWithLine<T> {
+    line_number: OneIndexed,
+    diagnostic: T,
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::db::Db;
+    use ruff_db::files::system_path_to_file;
+    use ruff_db::source::line_index;
+    use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
+    use ruff_source_file::OneIndexed;
+    use ruff_text_size::{TextRange, TextSize};
+
+    #[test]
+    fn sort_and_group() {
+        let mut db = Db::setup(SystemPathBuf::from("/src"));
+        db.write_file("/src/test.py", "one\ntwo\n").unwrap();
+        let file = system_path_to_file(&db, "/src/test.py").unwrap();
+        let lines = line_index(&db, file);
+
+        let ranges = vec![
+            TextRange::new(TextSize::new(0), TextSize::new(1)),
+            TextRange::new(TextSize::new(5), TextSize::new(10)),
+            TextRange::new(TextSize::new(1), TextSize::new(7)),
+        ];
+
+        let sorted = super::SortedDiagnostics::new(&ranges, &lines);
+        let grouped = sorted.iter_lines().collect::<Vec<_>>();
+
+        let [line1, line2] = &grouped[..] else {
+            panic!("expected two lines");
+        };
+
+        assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(0));
+        assert_eq!(line1.diagnostics.len(), 2);
+        assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(1));
+        assert_eq!(line2.diagnostics.len(), 1);
+    }
+}
--- a/crates/red_knot_test/src/lib.rs
+++ b/crates/red_knot_test/src/lib.rs
@ -0,0 +1,91 @@
+use parser as test_parser;
+use red_knot_python_semantic::types::check_types;
+use ruff_db::files::system_path_to_file;
+use ruff_db::parsed::parsed_module;
+use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
+use std::collections::BTreeMap;
+use std::path::PathBuf;
+
+type Failures = BTreeMap<SystemPathBuf, matcher::FailuresByLine>;
+
+mod assertion;
+mod db;
+mod diagnostic;
+mod matcher;
+mod parser;
+
+/// Run `path` as a markdown test suite with given `title`.
+///
+/// Panic on test failure, and print failure details.
+#[allow(clippy::print_stdout)]
+pub fn run(path: &PathBuf, title: &str) {
+    let source = std::fs::read_to_string(path).unwrap();
+    let suite = match test_parser::parse(title, &source) {
+        Ok(suite) => suite,
+        Err(err) => {
+            panic!("Error parsing `{}`: {err}", path.to_str().unwrap())
+        }
+    };
+
+    let mut any_failures = false;
+    for test in suite.tests() {
+        if let Err(failures) = run_test(&test) {
+            any_failures = true;
+            println!("{}", test.name());
+
+            for (path, by_line) in failures {
+                println!("  {path}");
+                for (line, failures) in by_line.iter() {
+                    for failure in failures {
+                        println!("    line {line}: {failure}");
+                    }
+                }
+                println!();
+            }
+        }
+    }
+
+    assert!(!any_failures, "Some tests failed.");
+}
+
+fn run_test(test: &parser::MarkdownTest) -> Result<(), Failures> {
+    let workspace_root = SystemPathBuf::from("/src");
+    let mut db = db::Db::setup(workspace_root.clone());
+
+    let mut system_paths = vec![];
+
+    for file in test.files() {
+        assert!(
+            matches!(file.lang, "py" | "pyi"),
+            "Non-Python files not supported yet."
+        );
+        let full_path = workspace_root.join(file.path);
+        db.write_file(&full_path, file.code).unwrap();
+        system_paths.push(full_path);
+    }
+
+    let mut failures = BTreeMap::default();
+
+    for path in system_paths {
+        let file = system_path_to_file(&db, path.clone()).unwrap();
+        let parsed = parsed_module(&db, file);
+
+        // TODO allow testing against code with syntax errors
+        assert!(
+            parsed.errors().is_empty(),
+            "Python syntax errors in {}, {:?}: {:?}",
+            test.name(),
+            path,
+            parsed.errors()
+        );
+
+        matcher::match_file(&db, file, check_types(&db, file)).unwrap_or_else(|line_failures| {
+            failures.insert(path, line_failures);
+        });
+    }
+    if failures.is_empty() {
+        Ok(())
+    } else {
+        Err(failures)
+    }
+}
--- a/crates/red_knot_test/src/matcher.rs
+++ b/crates/red_knot_test/src/matcher.rs
@ -0,0 +1,789 @@
+//! Match [`TypeCheckDiagnostic`]s against [`Assertion`]s and produce test failure messages for any
+//! mismatches.
+use crate::assertion::{Assertion, InlineFileAssertions};
+use crate::db::Db;
+use crate::diagnostic::SortedDiagnostics;
+use red_knot_python_semantic::types::TypeCheckDiagnostic;
+use ruff_db::files::File;
+use ruff_db::source::{line_index, source_text, SourceText};
+use ruff_source_file::{LineIndex, OneIndexed};
+use ruff_text_size::Ranged;
+use std::cmp::Ordering;
+use std::ops::Range;
+use std::sync::Arc;
+
+#[derive(Debug, Default)]
+pub(super) struct FailuresByLine {
+    failures: Vec<String>,
+    lines: Vec<LineFailures>,
+}
+
+impl FailuresByLine {
+    pub(super) fn iter(&self) -> impl Iterator<Item = (OneIndexed, &[String])> {
+        self.lines.iter().map(|line_failures| {
+            (
+                line_failures.line_number,
+                &self.failures[line_failures.range.clone()],
+            )
+        })
+    }
+
+    fn push(&mut self, line_number: OneIndexed, messages: Vec<String>) {
+        let start = self.failures.len();
+        self.failures.extend(messages);
+        self.lines.push(LineFailures {
+            line_number,
+            range: start..self.failures.len(),
+        });
+    }
+
+    fn is_empty(&self) -> bool {
+        self.lines.is_empty()
+    }
+}
+
+#[derive(Debug)]
+struct LineFailures {
+    line_number: OneIndexed,
+    range: Range<usize>,
+}
+
+pub(super) fn match_file<T>(
+    db: &Db,
+    file: File,
+    diagnostics: impl IntoIterator<Item = T>,
+) -> Result<(), FailuresByLine>
+where
+    T: Diagnostic + Clone,
+{
+    // Parse assertions from comments in the file, and get diagnostics from the file; both
+    // ordered by line number.
+    let assertions = InlineFileAssertions::from_file(db, file);
+    let diagnostics = SortedDiagnostics::new(diagnostics, &line_index(db, file));
+
+    // Get iterators over assertions and diagnostics grouped by line, in ascending line order.
+    let mut line_assertions = assertions.into_iter();
+    let mut line_diagnostics = diagnostics.iter_lines();
+
+    let mut current_assertions = line_assertions.next();
+    let mut current_diagnostics = line_diagnostics.next();
+
+    let matcher = Matcher::from_file(db, file);
+    let mut failures = FailuresByLine::default();
+
+    loop {
+        match (&current_assertions, &current_diagnostics) {
+            (Some(assertions), Some(diagnostics)) => {
+                match assertions.line_number.cmp(&diagnostics.line_number) {
+                    Ordering::Equal => {
+                        // We have assertions and diagnostics on the same line; check for
+                        // matches and error on any that don't match, then advance both
+                        // iterators.
+                        matcher
+                            .match_line(diagnostics, assertions)
+                            .unwrap_or_else(|messages| {
+                                failures.push(assertions.line_number, messages);
+                            });
+                        current_assertions = line_assertions.next();
+                        current_diagnostics = line_diagnostics.next();
+                    }
+                    Ordering::Less => {
+                        // We have assertions on an earlier line than diagnostics; report these
+                        // assertions as all unmatched, and advance the assertions iterator.
+                        failures.push(assertions.line_number, unmatched(assertions));
+                        current_assertions = line_assertions.next();
+                    }
+                    Ordering::Greater => {
+                        // We have diagnostics on an earlier line than assertions; report these
+                        // diagnostics as all unmatched, and advance the diagnostics iterator.
+                        failures.push(diagnostics.line_number, unmatched(diagnostics));
+                        current_diagnostics = line_diagnostics.next();
+                    }
+                }
+            }
+            (Some(assertions), None) => {
+                // We've exhausted diagnostics but still have assertions; report these assertions
+                // as unmatched and advance the assertions iterator.
+                failures.push(assertions.line_number, unmatched(assertions));
+                current_assertions = line_assertions.next();
+            }
+            (None, Some(diagnostics)) => {
+                // We've exhausted assertions but still have diagnostics; report these
+                // diagnostics as unmatched and advance the diagnostics iterator.
+                failures.push(diagnostics.line_number, unmatched(diagnostics));
+                current_diagnostics = line_diagnostics.next();
+            }
+            // When we've exhausted both diagnostics and assertions, break.
+            (None, None) => break,
+        }
+    }
+
+    if failures.is_empty() {
+        Ok(())
+    } else {
+        Err(failures)
+    }
+}
+
+pub(super) trait Diagnostic: Ranged {
+    fn rule(&self) -> &str;
+
+    fn message(&self) -> &str;
+}
+
+impl Diagnostic for Arc<TypeCheckDiagnostic> {
+    fn rule(&self) -> &str {
+        self.as_ref().rule()
+    }
+
+    fn message(&self) -> &str {
+        self.as_ref().message()
+    }
+}
+
+trait Unmatched {
+    fn unmatched(&self) -> String;
+}
+
+impl<T> Unmatched for T
+where
+    T: Diagnostic,
+{
+    fn unmatched(&self) -> String {
+        format!(
+            r#"unexpected error: [{}] "{}""#,
+            self.rule(),
+            self.message()
+        )
+    }
+}
+
+impl Unmatched for Assertion<'_> {
+    fn unmatched(&self) -> String {
+        format!("unmatched assertion: {self}")
+    }
+}
+
+fn unmatched<'a, T: Unmatched + 'a>(unmatched: &'a [T]) -> Vec<String> {
+    unmatched.iter().map(Unmatched::unmatched).collect()
+}
+
+struct Matcher {
+    line_index: LineIndex,
+    source: SourceText,
+}
+
+impl Matcher {
+    fn from_file(db: &Db, file: File) -> Self {
+        Self {
+            line_index: line_index(db, file),
+            source: source_text(db, file),
+        }
+    }
+
+    /// Check a slice of [`Diagnostic`]s against a slice of [`Assertion`]s.
+    ///
+    /// Return vector of [`Unmatched`] for any unmatched diagnostics or assertions.
+    fn match_line<'a, 'b, T: Diagnostic + 'a>(
+        &self,
+        diagnostics: &'a [T],
+        assertions: &'a [Assertion<'b>],
+    ) -> Result<(), Vec<String>>
+    where
+        'b: 'a,
+    {
+        let mut failures = vec![];
+        let mut unmatched: Vec<_> = diagnostics.iter().collect();
+        for assertion in assertions {
+            if !self.matches(assertion, &mut unmatched) {
+                failures.push(assertion.unmatched());
+            }
+        }
+        for diagnostic in unmatched {
+            failures.push(diagnostic.unmatched());
+        }
+        if failures.is_empty() {
+            Ok(())
+        } else {
+            Err(failures)
+        }
+    }
+
+    fn column<T: Ranged>(&self, ranged: &T) -> OneIndexed {
+        self.line_index
+            .source_location(ranged.start(), &self.source)
+            .column
+    }
+
+    /// Check if `assertion` matches any [`Diagnostic`]s in `unmatched`.
+    ///
+    /// If so, return `true` and remove the matched diagnostics from `unmatched`. Otherwise, return
+    /// `false`.
+    ///
+    /// An `Error` assertion can only match one diagnostic; even if it could match more than one,
+    /// we short-circuit after the first match.
+    ///
+    /// A `Revealed` assertion must match a revealed-type diagnostic, and may also match an
+    /// undefined-reveal diagnostic, if present.
+    fn matches<T: Diagnostic>(&self, assertion: &Assertion, unmatched: &mut Vec<&T>) -> bool {
+        match assertion {
+            Assertion::Error(error) => {
+                let position = unmatched.iter().position(|diagnostic| {
+                    !error.rule.is_some_and(|rule| rule != diagnostic.rule())
+                        && !error
+                            .column
+                            .is_some_and(|col| col != self.column(*diagnostic))
+                        && !error
+                            .message_contains
+                            .is_some_and(|needle| !diagnostic.message().contains(needle))
+                });
+                if let Some(position) = position {
+                    unmatched.swap_remove(position);
+                    true
+                } else {
+                    false
+                }
+            }
+            Assertion::Revealed(expected_type) => {
+                let mut matched_revealed_type = None;
+                let mut matched_undefined_reveal = None;
+                let expected_reveal_type_message = format!("Revealed type is `{expected_type}`");
+                for (index, diagnostic) in unmatched.iter().enumerate() {
+                    if matched_revealed_type.is_none()
+                        && diagnostic.rule() == "revealed-type"
+                        && diagnostic.message() == expected_reveal_type_message
+                    {
+                        matched_revealed_type = Some(index);
+                    } else if matched_undefined_reveal.is_none()
+                        && diagnostic.rule() == "undefined-reveal"
+                    {
+                        matched_undefined_reveal = Some(index);
+                    }
+                    if matched_revealed_type.is_some() && matched_undefined_reveal.is_some() {
+                        break;
+                    }
+                }
+                if matched_revealed_type.is_some() {
+                    let mut idx = 0;
+                    unmatched.retain(|_| {
+                        let retain = Some(idx) != matched_revealed_type
+                            && Some(idx) != matched_undefined_reveal;
+                        idx += 1;
+                        retain
+                    });
+                    true
+                } else {
+                    false
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::FailuresByLine;
+    use ruff_db::files::system_path_to_file;
+    use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
+    use ruff_source_file::OneIndexed;
+    use ruff_text_size::{Ranged, TextRange};
+
+    #[derive(Clone, Debug)]
+    struct TestDiagnostic {
+        rule: &'static str,
+        message: &'static str,
+        range: TextRange,
+    }
+
+    impl TestDiagnostic {
+        fn new(rule: &'static str, message: &'static str, offset: usize) -> Self {
+            let offset: u32 = offset.try_into().unwrap();
+            Self {
+                rule,
+                message,
+                range: TextRange::new(offset.into(), (offset + 1).into()),
+            }
+        }
+    }
+
+    impl super::Diagnostic for TestDiagnostic {
+        fn rule(&self) -> &str {
+            self.rule
+        }
+
+        fn message(&self) -> &str {
+            self.message
+        }
+    }
+
+    impl Ranged for TestDiagnostic {
+        fn range(&self) -> ruff_text_size::TextRange {
+            self.range
+        }
+    }
+
+    fn get_result(source: &str, diagnostics: Vec<TestDiagnostic>) -> Result<(), FailuresByLine> {
+        let mut db = crate::db::Db::setup(SystemPathBuf::from("/src"));
+        db.write_file("/src/test.py", source).unwrap();
+        let file = system_path_to_file(&db, "/src/test.py").unwrap();
+
+        super::match_file(&db, file, diagnostics)
+    }
+
+    fn assert_fail(result: Result<(), FailuresByLine>, messages: &[(usize, &[&str])]) {
+        let Err(failures) = result else {
+            panic!("expected a failure");
+        };
+
+        let expected: Vec<(OneIndexed, Vec<String>)> = messages
+            .iter()
+            .map(|(idx, msgs)| {
+                (
+                    OneIndexed::from_zero_indexed(*idx),
+                    msgs.iter().map(ToString::to_string).collect(),
+                )
+            })
+            .collect();
+        let failures: Vec<(OneIndexed, Vec<String>)> = failures
+            .iter()
+            .map(|(idx, msgs)| (idx, msgs.to_vec()))
+            .collect();
+
+        assert_eq!(failures, expected);
+    }
+
+    fn assert_ok(result: &Result<(), FailuresByLine>) {
+        assert!(result.is_ok(), "{result:?}");
+    }
+
+    #[test]
+    fn type_match() {
+        let result = get_result(
+            "x # revealed: Foo",
+            vec![TestDiagnostic::new(
+                "revealed-type",
+                "Revealed type is `Foo`",
+                0,
+            )],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn type_wrong_rule() {
+        let result = get_result(
+            "x # revealed: Foo",
+            vec![TestDiagnostic::new(
+                "not-revealed-type",
+                "Revealed type is `Foo`",
+                0,
+            )],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    "unmatched assertion: revealed: Foo",
+                    r#"unexpected error: [not-revealed-type] "Revealed type is `Foo`""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn type_wrong_message() {
+        let result = get_result(
+            "x # revealed: Foo",
+            vec![TestDiagnostic::new("revealed-type", "Something else", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    "unmatched assertion: revealed: Foo",
+                    r#"unexpected error: [revealed-type] "Something else""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn type_unmatched() {
+        let result = get_result("x # revealed: Foo", vec![]);
+
+        assert_fail(result, &[(0, &["unmatched assertion: revealed: Foo"])]);
+    }
+
+    #[test]
+    fn type_match_with_undefined() {
+        let result = get_result(
+            "x # revealed: Foo",
+            vec![
+                TestDiagnostic::new("revealed-type", "Revealed type is `Foo`", 0),
+                TestDiagnostic::new("undefined-reveal", "Doesn't matter", 0),
+            ],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn type_match_with_only_undefined() {
+        let result = get_result(
+            "x # revealed: Foo",
+            vec![TestDiagnostic::new("undefined-reveal", "Doesn't matter", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    "unmatched assertion: revealed: Foo",
+                    r#"unexpected error: [undefined-reveal] "Doesn't matter""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn error_match() {
+        let result = get_result(
+            "x # error:",
+            vec![TestDiagnostic::new("anything", "Any message", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_unmatched() {
+        let result = get_result("x # error:", vec![]);
+
+        assert_fail(result, &[(0, &["unmatched assertion: error:"])]);
+    }
+
+    #[test]
+    fn error_match_column() {
+        let result = get_result(
+            "x # error: 1",
+            vec![TestDiagnostic::new("anything", "Any message", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_wrong_column() {
+        let result = get_result(
+            "x # error: 2",
+            vec![TestDiagnostic::new("anything", "Any message", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    "unmatched assertion: error: 2",
+                    r#"unexpected error: [anything] "Any message""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn error_match_rule() {
+        let result = get_result(
+            "x # error: [some-rule]",
+            vec![TestDiagnostic::new("some-rule", "Any message", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_wrong_rule() {
+        let result = get_result(
+            "x # error: [some-rule]",
+            vec![TestDiagnostic::new("anything", "Any message", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    "unmatched assertion: error: [some-rule]",
+                    r#"unexpected error: [anything] "Any message""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn error_match_message() {
+        let result = get_result(
+            r#"x # error: "contains this""#,
+            vec![TestDiagnostic::new("anything", "message contains this", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_wrong_message() {
+        let result = get_result(
+            r#"x # error: "contains this""#,
+            vec![TestDiagnostic::new("anything", "Any message", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    r#"unmatched assertion: error: "contains this""#,
+                    r#"unexpected error: [anything] "Any message""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn error_match_column_and_rule() {
+        let result = get_result(
+            "x # error: 1 [some-rule]",
+            vec![TestDiagnostic::new("some-rule", "Any message", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_match_column_and_message() {
+        let result = get_result(
+            r#"x # error: 1 "contains this""#,
+            vec![TestDiagnostic::new("anything", "message contains this", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_match_rule_and_message() {
+        let result = get_result(
+            r#"x # error: [a-rule] "contains this""#,
+            vec![TestDiagnostic::new("a-rule", "message contains this", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_match_all() {
+        let result = get_result(
+            r#"x # error: 1 [a-rule] "contains this""#,
+            vec![TestDiagnostic::new("a-rule", "message contains this", 0)],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn error_match_all_wrong_column() {
+        let result = get_result(
+            r#"x # error: 2 [some-rule] "contains this""#,
+            vec![TestDiagnostic::new("some-rule", "message contains this", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    r#"unmatched assertion: error: 2 [some-rule] "contains this""#,
+                    r#"unexpected error: [some-rule] "message contains this""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn error_match_all_wrong_rule() {
+        let result = get_result(
+            r#"x # error: 1 [some-rule] "contains this""#,
+            vec![TestDiagnostic::new(
+                "other-rule",
+                "message contains this",
+                0,
+            )],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    r#"unmatched assertion: error: 1 [some-rule] "contains this""#,
+                    r#"unexpected error: [other-rule] "message contains this""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn error_match_all_wrong_message() {
+        let result = get_result(
+            r#"x # error: 1 [some-rule] "contains this""#,
+            vec![TestDiagnostic::new("some-rule", "Any message", 0)],
+        );
+
+        assert_fail(
+            result,
+            &[(
+                0,
+                &[
+                    r#"unmatched assertion: error: 1 [some-rule] "contains this""#,
+                    r#"unexpected error: [some-rule] "Any message""#,
+                ],
+            )],
+        );
+    }
+
+    #[test]
+    fn interspersed_matches_and_mismatches() {
+        let source = r#"
+            1 # error: [line-one]
+            2
+            3 # error: [line-three]
+            4 # error: [line-four]
+            5
+            6: # error: [line-six]
+            "#;
+        let two = source.find('2').unwrap();
+        let three = source.find('3').unwrap();
+        let five = source.find('5').unwrap();
+        let result = get_result(
+            source,
+            vec![
+                TestDiagnostic::new("line-two", "msg", two),
+                TestDiagnostic::new("line-three", "msg", three),
+                TestDiagnostic::new("line-five", "msg", five),
+            ],
+        );
+
+        assert_fail(
+            result,
+            &[
+                (1, &["unmatched assertion: error: [line-one]"]),
+                (2, &[r#"unexpected error: [line-two] "msg""#]),
+                (4, &["unmatched assertion: error: [line-four]"]),
+                (5, &[r#"unexpected error: [line-five] "msg""#]),
+                (6, &["unmatched assertion: error: [line-six]"]),
+            ],
+        );
+    }
+
+    #[test]
+    fn more_diagnostics_than_assertions() {
+        let source = r#"
+            1 # error: [line-one]
+            2
+            "#;
+        let one = source.find('1').unwrap();
+        let two = source.find('2').unwrap();
+        let result = get_result(
+            source,
+            vec![
+                TestDiagnostic::new("line-one", "msg", one),
+                TestDiagnostic::new("line-two", "msg", two),
+            ],
+        );
+
+        assert_fail(result, &[(2, &[r#"unexpected error: [line-two] "msg""#])]);
+    }
+
+    #[test]
+    fn multiple_assertions_and_diagnostics_same_line() {
+        let source = "
+            # error: [one-rule]
+            # error: [other-rule]
+            x
+            ";
+        let x = source.find('x').unwrap();
+        let result = get_result(
+            source,
+            vec![
+                TestDiagnostic::new("one-rule", "msg", x),
+                TestDiagnostic::new("other-rule", "msg", x),
+            ],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn multiple_assertions_and_diagnostics_same_line_all_same() {
+        let source = "
+            # error: [one-rule]
+            # error: [one-rule]
+            x
+            ";
+        let x = source.find('x').unwrap();
+        let result = get_result(
+            source,
+            vec![
+                TestDiagnostic::new("one-rule", "msg", x),
+                TestDiagnostic::new("one-rule", "msg", x),
+            ],
+        );
+
+        assert_ok(&result);
+    }
+
+    #[test]
+    fn multiple_assertions_and_diagnostics_same_line_mismatch() {
+        let source = "
+            # error: [one-rule]
+            # error: [other-rule]
+            x
+            ";
+        let x = source.find('x').unwrap();
+        let result = get_result(
+            source,
+            vec![
+                TestDiagnostic::new("one-rule", "msg", x),
+                TestDiagnostic::new("other-rule", "msg", x),
+                TestDiagnostic::new("third-rule", "msg", x),
+            ],
+        );
+
+        assert_fail(result, &[(3, &[r#"unexpected error: [third-rule] "msg""#])]);
+    }
+
+    #[test]
+    fn parenthesized_expression() {
+        let source = "
+            a = b + (
+                error: [undefined-reveal]
+                reveal_type(5)  # revealed: Literal[5]
+            )
+            ";
+        let reveal = source.find("reveal_type").unwrap();
+        let result = get_result(
+            source,
+            vec![
+                TestDiagnostic::new("undefined-reveal", "msg", reveal),
+                TestDiagnostic::new("revealed-type", "Revealed type is `Literal[5]`", reveal),
+            ],
+        );
+
+        assert_ok(&result);
+    }
+}
--- a/crates/red_knot_test/src/parser.rs
+++ b/crates/red_knot_test/src/parser.rs
@ -0,0 +1,576 @@
+use once_cell::sync::Lazy;
+use regex::{Captures, Regex};
+use ruff_index::{newtype_index, IndexVec};
+use rustc_hash::{FxHashMap, FxHashSet};
+
+/// Parse the Markdown `source` as a test suite with given `title`.
+pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
+    let parser = Parser::new(title, source);
+    parser.parse()
+}
+
+/// A parsed markdown file containing tests.
+///
+/// Borrows from the source string and filepath it was created from.
+#[derive(Debug)]
+pub(crate) struct MarkdownTestSuite<'s> {
+    /// Header sections.
+    sections: IndexVec<SectionId, Section<'s>>,
+
+    /// Test files embedded within the Markdown file.
+    files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,
+}
+
+impl<'s> MarkdownTestSuite<'s> {
+    pub(crate) fn tests(&self) -> MarkdownTestIterator<'_, 's> {
+        MarkdownTestIterator {
+            suite: self,
+            current_file_index: 0,
+        }
+    }
+}
+
+/// A single test inside a [`MarkdownTestSuite`].
+///
+/// A test is a single header section (or the implicit root section, if there are no Markdown
+/// headers in the file), containing one or more embedded Python files as fenced code blocks, and
+/// containing no nested header subsections.
+#[derive(Debug)]
+pub(crate) struct MarkdownTest<'m, 's> {
+    suite: &'m MarkdownTestSuite<'s>,
+    section: &'m Section<'s>,
+    files: &'m [EmbeddedFile<'s>],
+}
+
+impl<'m, 's> MarkdownTest<'m, 's> {
+    pub(crate) fn name(&self) -> String {
+        let mut name = String::new();
+        let mut parent_id = self.section.parent_id;
+        while let Some(next_id) = parent_id {
+            let parent = &self.suite.sections[next_id];
+            parent_id = parent.parent_id;
+            if !name.is_empty() {
+                name.insert_str(0, " - ");
+            }
+            name.insert_str(0, parent.title);
+        }
+        if !name.is_empty() {
+            name.push_str(" - ");
+        }
+        name.push_str(self.section.title);
+        name
+    }
+
+    pub(crate) fn files(&self) -> impl Iterator<Item = &'m EmbeddedFile<'s>> {
+        self.files.iter()
+    }
+}
+
+/// Iterator yielding all [`MarkdownTest`]s in a [`MarkdownTestSuite`].
+#[derive(Debug)]
+pub(crate) struct MarkdownTestIterator<'m, 's> {
+    suite: &'m MarkdownTestSuite<'s>,
+    current_file_index: usize,
+}
+
+impl<'m, 's> Iterator for MarkdownTestIterator<'m, 's> {
+    type Item = MarkdownTest<'m, 's>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut current_file_index = self.current_file_index;
+        let mut file = self.suite.files.get(current_file_index.into());
+        let section_id = file?.section;
+        while file.is_some_and(|file| file.section == section_id) {
+            current_file_index += 1;
+            file = self.suite.files.get(current_file_index.into());
+        }
+        let files = &self.suite.files[EmbeddedFileId::from_usize(self.current_file_index)
+            ..EmbeddedFileId::from_usize(current_file_index)];
+        self.current_file_index = current_file_index;
+        Some(MarkdownTest {
+            suite: self.suite,
+            section: &self.suite.sections[section_id],
+            files,
+        })
+    }
+}
+
+#[newtype_index]
+struct SectionId;
+
+/// A single header section of a [`MarkdownTestSuite`], or the implicit root "section".
+///
+/// A header section is the part of a Markdown file beginning with a `#`-prefixed header line, and
+/// extending until the next header line at the same or higher outline level (that is, with the
+/// same number or fewer `#` characters).
+///
+/// A header section may either contain one or more embedded Python files (making it a
+/// [`MarkdownTest`]), or it may contain nested sections (headers with more `#` characters), but
+/// not both.
+#[derive(Debug)]
+struct Section<'s> {
+    title: &'s str,
+    level: u8,
+    parent_id: Option<SectionId>,
+}
+
+#[newtype_index]
+struct EmbeddedFileId;
+
+/// A single file embedded in a [`Section`] as a fenced code block.
+///
+/// Currently must be a Python file (`py` language) or type stub (`pyi`). In the future we plan
+/// support other kinds of files as well (TOML configuration, typeshed VERSIONS, `pth` files...).
+///
+/// A Python embedded file makes its containing [`Section`] into a [`MarkdownTest`], and will be
+/// type-checked and searched for inline-comment assertions to match against the diagnostics from
+/// type checking.
+#[derive(Debug)]
+pub(crate) struct EmbeddedFile<'s> {
+    section: SectionId,
+    pub(crate) path: &'s str,
+    pub(crate) lang: &'s str,
+    pub(crate) code: &'s str,
+}
+
+/// Matches an arbitrary amount of whitespace (including newlines), followed by a sequence of `#`
+/// characters, followed by a title heading, followed by a newline.
+static HEADER_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"^(\s*\n)*(?<level>#+)\s+(?<title>.+)\s*\n").unwrap());
+
+/// Matches a code block fenced by triple backticks, possibly with language and `key=val`
+/// configuration items following the opening backticks (in the "tag string" of the code block).
+static CODE_RE: Lazy<Regex> = Lazy::new(|| {
+    Regex::new(r"^```(?<lang>\w+)(?<config>( +\S+)*)\s*\n(?<code>(.|\n)*?)\n```\s*\n").unwrap()
+});
+
+#[derive(Debug)]
+struct SectionStack(Vec<SectionId>);
+
+impl SectionStack {
+    fn new(root_section_id: SectionId) -> Self {
+        Self(vec![root_section_id])
+    }
+
+    fn push(&mut self, section_id: SectionId) {
+        self.0.push(section_id);
+    }
+
+    fn pop(&mut self) -> Option<SectionId> {
+        let popped = self.0.pop();
+        debug_assert_ne!(popped, None, "Should never pop the implicit root section");
+        debug_assert!(
+            !self.0.is_empty(),
+            "Should never pop the implicit root section"
+        );
+        popped
+    }
+
+    fn parent(&mut self) -> SectionId {
+        *self
+            .0
+            .last()
+            .expect("Should never pop the implicit root section")
+    }
+}
+
+/// Parse the source of a Markdown file into a [`MarkdownTestSuite`].
+#[derive(Debug)]
+struct Parser<'s> {
+    /// [`Section`]s of the final [`MarkdownTestSuite`].
+    sections: IndexVec<SectionId, Section<'s>>,
+
+    /// [`EmbeddedFile`]s of the final [`MarkdownTestSuite`].
+    files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,
+
+    /// The unparsed remainder of the Markdown source.
+    unparsed: &'s str,
+
+    /// Stack of ancestor sections.
+    stack: SectionStack,
+
+    /// Names of embedded files in current active section.
+    current_section_files: Option<FxHashSet<&'s str>>,
+}
+
+impl<'s> Parser<'s> {
+    fn new(title: &'s str, source: &'s str) -> Self {
+        let mut sections = IndexVec::default();
+        let root_section_id = sections.push(Section {
+            title,
+            level: 0,
+            parent_id: None,
+        });
+        Self {
+            sections,
+            files: IndexVec::default(),
+            unparsed: source,
+            stack: SectionStack::new(root_section_id),
+            current_section_files: None,
+        }
+    }
+
+    fn parse(mut self) -> anyhow::Result<MarkdownTestSuite<'s>> {
+        self.parse_impl()?;
+        Ok(self.finish())
+    }
+
+    fn finish(mut self) -> MarkdownTestSuite<'s> {
+        self.sections.shrink_to_fit();
+        self.files.shrink_to_fit();
+
+        MarkdownTestSuite {
+            sections: self.sections,
+            files: self.files,
+        }
+    }
+
+    fn parse_impl(&mut self) -> anyhow::Result<()> {
+        while !self.unparsed.is_empty() {
+            if let Some(captures) = self.scan(&HEADER_RE) {
+                self.parse_header(&captures)?;
+            } else if let Some(captures) = self.scan(&CODE_RE) {
+                self.parse_code_block(&captures)?;
+            } else {
+                // ignore other Markdown syntax (paragraphs, etc) used as comments in the test
+                if let Some(next_newline) = self.unparsed.find('\n') {
+                    (_, self.unparsed) = self.unparsed.split_at(next_newline + 1);
+                } else {
+                    break;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    fn parse_header(&mut self, captures: &Captures<'s>) -> anyhow::Result<()> {
+        let header_level = captures["level"].len();
+        self.pop_sections_to_level(header_level);
+
+        let parent = self.stack.parent();
+
+        let section = Section {
+            // HEADER_RE can't match without a match for group 'title'.
+            title: captures.name("title").unwrap().into(),
+            level: header_level.try_into()?,
+            parent_id: Some(parent),
+        };
+
+        if self.current_section_files.is_some() {
+            return Err(anyhow::anyhow!(
+                "Header '{}' not valid inside a test case; parent '{}' has code files.",
+                section.title,
+                self.sections[parent].title,
+            ));
+        }
+
+        let section_id = self.sections.push(section);
+        self.stack.push(section_id);
+
+        self.current_section_files = None;
+
+        Ok(())
+    }
+
+    fn parse_code_block(&mut self, captures: &Captures<'s>) -> anyhow::Result<()> {
+        // We never pop the implicit root section.
+        let parent = self.stack.parent();
+
+        let mut config: FxHashMap<&'s str, &'s str> = FxHashMap::default();
+
+        if let Some(config_match) = captures.name("config") {
+            for item in config_match.as_str().split_whitespace() {
+                let mut parts = item.split('=');
+                let key = parts.next().unwrap();
+                let Some(val) = parts.next() else {
+                    return Err(anyhow::anyhow!("Invalid config item `{}`.", item));
+                };
+                if parts.next().is_some() {
+                    return Err(anyhow::anyhow!("Invalid config item `{}`.", item));
+                }
+                if config.insert(key, val).is_some() {
+                    return Err(anyhow::anyhow!("Duplicate config item `{}`.", item));
+                }
+            }
+        }
+
+        let path = config.get("path").copied().unwrap_or("test.py");
+
+        self.files.push(EmbeddedFile {
+            path,
+            section: parent,
+            // CODE_RE can't match without matches for 'lang' and 'code'.
+            lang: captures.name("lang").unwrap().into(),
+            code: captures.name("code").unwrap().into(),
+        });
+
+        if let Some(current_files) = &mut self.current_section_files {
+            if !current_files.insert(path) {
+                if path == "test.py" {
+                    return Err(anyhow::anyhow!(
+                        "Test `{}` has duplicate files named `{path}`. \
+                                (This is the default filename; \
+                                 consider giving some files an explicit name with `path=...`.)",
+                        self.sections[parent].title
+                    ));
+                }
+                return Err(anyhow::anyhow!(
+                    "Test `{}` has duplicate files named `{path}`.",
+                    self.sections[parent].title
+                ));
+            };
+        } else {
+            self.current_section_files = Some(FxHashSet::from_iter([path]));
+        }
+
+        Ok(())
+    }
+
+    fn pop_sections_to_level(&mut self, level: usize) {
+        while level <= self.sections[self.stack.parent()].level.into() {
+            self.stack.pop();
+            // We would have errored before pushing a child section if there were files, so we know
+            // no parent section can have files.
+            self.current_section_files = None;
+        }
+    }
+
+    /// Get capture groups and advance cursor past match if unparsed text matches `pattern`.
+    fn scan(&mut self, pattern: &Regex) -> Option<Captures<'s>> {
+        if let Some(captures) = pattern.captures(self.unparsed) {
+            let (_, unparsed) = self.unparsed.split_at(captures.get(0).unwrap().end());
+            self.unparsed = unparsed;
+            Some(captures)
+        } else {
+            None
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ruff_python_trivia::textwrap::dedent;
+
+    #[test]
+    fn empty() {
+        let mf = super::parse("file.md", "").unwrap();
+
+        assert!(mf.tests().next().is_none());
+    }
+
+    #[test]
+    fn single_file_test() {
+        let source = dedent(
+            "
+            ```py
+            x = 1
+            ```
+            ",
+        );
+        let mf = super::parse("file.md", &source).unwrap();
+
+        let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
+            panic!("expected one test");
+        };
+
+        assert_eq!(test.name(), "file.md");
+
+        let [file] = test.files().collect::<Vec<_>>()[..] else {
+            panic!("expected one file");
+        };
+
+        assert_eq!(file.path, "test.py");
+        assert_eq!(file.lang, "py");
+        assert_eq!(file.code, "x = 1");
+    }
+
+    #[test]
+    fn multiple_tests() {
+        let source = dedent(
+            "
+            # One
+
+            ```py
+            x = 1
+            ```
+
+            # Two
+
+            ```py
+            y = 2
+            ```
+            ",
+        );
+        let mf = super::parse("file.md", &source).unwrap();
+
+        let [test1, test2] = &mf.tests().collect::<Vec<_>>()[..] else {
+            panic!("expected two tests");
+        };
+
+        assert_eq!(test1.name(), "file.md - One");
+        assert_eq!(test2.name(), "file.md - Two");
+
+        let [file] = test1.files().collect::<Vec<_>>()[..] else {
+            panic!("expected one file");
+        };
+
+        assert_eq!(file.path, "test.py");
+        assert_eq!(file.lang, "py");
+        assert_eq!(file.code, "x = 1");
+
+        let [file] = test2.files().collect::<Vec<_>>()[..] else {
+            panic!("expected one file");
+        };
+
+        assert_eq!(file.path, "test.py");
+        assert_eq!(file.lang, "py");
+        assert_eq!(file.code, "y = 2");
+    }
+
+    #[test]
+    fn custom_file_path() {
+        let source = dedent(
+            "
+            ```py path=foo.py
+            x = 1
+            ```
+            ",
+        );
+        let mf = super::parse("file.md", &source).unwrap();
+
+        let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
+            panic!("expected one test");
+        };
+        let [file] = test.files().collect::<Vec<_>>()[..] else {
+            panic!("expected one file");
+        };
+
+        assert_eq!(file.path, "foo.py");
+        assert_eq!(file.lang, "py");
+        assert_eq!(file.code, "x = 1");
+    }
+
+    #[test]
+    fn multi_line_file() {
+        let source = dedent(
+            "
+            ```py
+            x = 1
+            y = 2
+            ```
+            ",
+        );
+        let mf = super::parse("file.md", &source).unwrap();
+
+        let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
+            panic!("expected one test");
+        };
+        let [file] = test.files().collect::<Vec<_>>()[..] else {
+            panic!("expected one file");
+        };
+
+        assert_eq!(file.code, "x = 1\ny = 2");
+    }
+
+    #[test]
+    fn no_header_inside_test() {
+        let source = dedent(
+            "
+            # One
+
+            ```py
+            x = 1
+            ```
+
+            ## Two
+            ",
+        );
+        let err = super::parse("file.md", &source).expect_err("Should fail to parse");
+        assert_eq!(
+            err.to_string(),
+            "Header 'Two' not valid inside a test case; parent 'One' has code files."
+        );
+    }
+
+    #[test]
+    fn invalid_config_item_no_equals() {
+        let source = dedent(
+            "
+            ```py foo
+            x = 1
+            ```
+            ",
+        );
+        let err = super::parse("file.md", &source).expect_err("Should fail to parse");
+        assert_eq!(err.to_string(), "Invalid config item `foo`.");
+    }
+
+    #[test]
+    fn invalid_config_item_too_many_equals() {
+        let source = dedent(
+            "
+            ```py foo=bar=baz
+            x = 1
+            ```
+            ",
+        );
+        let err = super::parse("file.md", &source).expect_err("Should fail to parse");
+        assert_eq!(err.to_string(), "Invalid config item `foo=bar=baz`.");
+    }
+
+    #[test]
+    fn invalid_config_item_duplicate() {
+        let source = dedent(
+            "
+            ```py foo=bar foo=baz
+            x = 1
+            ```
+            ",
+        );
+        let err = super::parse("file.md", &source).expect_err("Should fail to parse");
+        assert_eq!(err.to_string(), "Duplicate config item `foo=baz`.");
+    }
+
+    #[test]
+    fn no_duplicate_name_files_in_test() {
+        let source = dedent(
+            "
+            ```py
+            x = 1
+            ```
+
+            ```py
+            y = 2
+            ```
+            ",
+        );
+        let err = super::parse("file.md", &source).expect_err("Should fail to parse");
+        assert_eq!(
+            err.to_string(),
+            "Test `file.md` has duplicate files named `test.py`. \
+            (This is the default filename; consider giving some files an explicit name \
+            with `path=...`.)"
+        );
+    }
+
+    #[test]
+    fn no_duplicate_name_files_in_test_non_default() {
+        let source = dedent(
+            "
+            ```py path=foo.py
+            x = 1
+            ```
+
+            ```py path=foo.py
+            y = 2
+            ```
+            ",
+        );
+        let err = super::parse("file.md", &source).expect_err("Should fail to parse");
+        assert_eq!(
+            err.to_string(),
+            "Test `file.md` has duplicate files named `foo.py`."
+        );
+    }
+}
--- a/crates/ruff_python_trivia/src/comment_ranges.rs
+++ b/crates/ruff_python_trivia/src/comment_ranges.rs
@ -194,7 +194,7 @@ impl CommentRanges {
    }

    /// Returns `true` if a comment is an own-line comment (as opposed to an end-of-line comment).
-    fn is_own_line(offset: TextSize, locator: &Locator) -> bool {
+    pub fn is_own_line(offset: TextSize, locator: &Locator) -> bool {
        let range = TextRange::new(locator.line_start(offset), offset);
        locator.slice(range).chars().all(is_python_whitespace)
    }
--- a/crates/ruff_text_size/src/traits.rs
+++ b/crates/ruff_text_size/src/traits.rs
@ -1,3 +1,4 @@
+use std::sync::Arc;
 use {crate::TextRange, crate::TextSize, std::convert::TryInto};

 use priv_in_pub::Sealed;
@ -66,3 +67,12 @@ where
        T::range(self)
    }
 }
+
+impl<T> Ranged for Arc<T>
+where
+    T: Ranged,
+{
+    fn range(&self) -> TextRange {
+        T::range(self)
+    }
+}