Add mdtest support for files with invalid syntax (#14126)

2025-07-29 16:03:50 +00:00 · 2024-11-06 12:25:52 +01:00 · 2024-11-06 12:25:52 +01:00 · a56ee9268e
commit a56ee9268e
parent 4ece8e5c1e
9 changed files with 137 additions and 70 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -51,6 +51,10 @@ repos:
      - id: blacken-docs
        args: ["--pyi", "--line-length", "130"]
        files: '^crates/.*/resources/mdtest/.*\.md'
+        exclude: |
+          (?x)^(
+            .*?invalid(_.+)_syntax.md
+          )$
        additional_dependencies:
          - black==24.10.0

--- a/Cargo.lock
+++ b/Cargo.lock
@ -2173,6 +2173,7 @@ dependencies = [
 "regex",
 "ruff_db",
 "ruff_index",
+ "ruff_python_parser",
 "ruff_python_trivia",
 "ruff_source_file",
 "ruff_text_size",
--- a/crates/red_knot_python_semantic/resources/mdtest/exception/invalid_exception_syntax.md
+++ b/crates/red_knot_python_semantic/resources/mdtest/exception/invalid_exception_syntax.md
@ -0,0 +1,13 @@
+# Exception Handling
+
+## Invalid syntax
+
+```py
+from typing_extensions import reveal_type
+
+try:
+    print
+except as e:  # error: [invalid-syntax]
+    reveal_type(e)  # revealed: Unknown
+
+```
--- a/crates/red_knot_python_semantic/src/types/diagnostic.rs
+++ b/crates/red_knot_python_semantic/src/types/diagnostic.rs
@ -8,7 +8,7 @@ use std::sync::Arc;
 use crate::types::{ClassLiteralType, Type};
 use crate::Db;

-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Eq, PartialEq, Clone)]
 pub struct TypeCheckDiagnostic {
    // TODO: Don't use string keys for rules
    pub(super) rule: String,
--- a/crates/red_knot_python_semantic/src/types/infer.rs
+++ b/crates/red_knot_python_semantic/src/types/infer.rs
@ -5061,27 +5061,6 @@ mod tests {
        Ok(())
    }

-    #[test]
-    fn exception_handler_with_invalid_syntax() -> anyhow::Result<()> {
-        let mut db = setup_db();
-
-        db.write_dedented(
-            "src/a.py",
-            "
-            from typing_extensions import reveal_type
-
-            try:
-                print
-            except as e:
-                reveal_type(e)
-            ",
-        )?;
-
-        assert_file_diagnostics(&db, "src/a.py", &["Revealed type is `Unknown`"]);
-
-        Ok(())
-    }
-
    #[test]
    fn basic_comprehension() -> anyhow::Result<()> {
        let mut db = setup_db();
@ -5424,7 +5403,7 @@ mod tests {
                    return 42

            class Iterable:
-                def __iter__(self) -> Iterator:
+                def __iter__(self) -> Iterator: ...

            x = [*NotIterable()]
            y = [*Iterable()]
--- a/crates/red_knot_test/Cargo.toml
+++ b/crates/red_knot_test/Cargo.toml
@ -15,6 +15,7 @@ red_knot_python_semantic = { workspace = true }
 red_knot_vendored = { workspace = true }
 ruff_db = { workspace = true }
 ruff_index = { workspace = true }
+ruff_python_parser = { workspace = true }
 ruff_python_trivia = { workspace = true }
 ruff_source_file = { workspace = true }
 ruff_text_size = { workspace = true }
--- a/crates/red_knot_test/src/diagnostic.rs
+++ b/crates/red_knot_test/src/diagnostic.rs
@ -2,10 +2,63 @@
 //!
 //! We don't assume that we will get the diagnostics in source order.

+use red_knot_python_semantic::types::TypeCheckDiagnostic;
+use ruff_python_parser::ParseError;
 use ruff_source_file::{LineIndex, OneIndexed};
-use ruff_text_size::Ranged;
+use ruff_text_size::{Ranged, TextRange};
+use std::borrow::Cow;
 use std::ops::{Deref, Range};

+pub(super) trait Diagnostic: std::fmt::Debug {
+    fn rule(&self) -> &str;
+
+    fn message(&self) -> Cow<str>;
+
+    fn range(&self) -> TextRange;
+}
+
+impl Diagnostic for TypeCheckDiagnostic {
+    fn rule(&self) -> &str {
+        TypeCheckDiagnostic::rule(self)
+    }
+
+    fn message(&self) -> Cow<str> {
+        TypeCheckDiagnostic::message(self).into()
+    }
+
+    fn range(&self) -> TextRange {
+        Ranged::range(self)
+    }
+}
+
+impl Diagnostic for ParseError {
+    fn rule(&self) -> &str {
+        "invalid-syntax"
+    }
+
+    fn message(&self) -> Cow<str> {
+        self.error.to_string().into()
+    }
+
+    fn range(&self) -> TextRange {
+        self.location
+    }
+}
+
+impl Diagnostic for Box<dyn Diagnostic> {
+    fn rule(&self) -> &str {
+        (**self).rule()
+    }
+
+    fn message(&self) -> Cow<str> {
+        (**self).message()
+    }
+
+    fn range(&self) -> TextRange {
+        (**self).range()
+    }
+}
+
 /// All diagnostics for one embedded Python file, sorted and grouped by start line number.
 ///
 /// The diagnostics are kept in a flat vector, sorted by line number. A separate vector of
@ -19,13 +72,13 @@ pub(crate) struct SortedDiagnostics<T> {

 impl<T> SortedDiagnostics<T>
 where
-    T: Ranged + Clone,
+    T: Diagnostic,
 {
    pub(crate) fn new(diagnostics: impl IntoIterator<Item = T>, line_index: &LineIndex) -> Self {
        let mut diagnostics: Vec<_> = diagnostics
            .into_iter()
            .map(|diagnostic| DiagnosticWithLine {
-                line_number: line_index.line_index(diagnostic.start()),
+                line_number: line_index.line_index(diagnostic.range().start()),
                diagnostic,
            })
            .collect();
@ -94,7 +147,7 @@ pub(crate) struct LineDiagnosticsIterator<'a, T> {

 impl<'a, T> Iterator for LineDiagnosticsIterator<'a, T>
 where
-    T: Ranged + Clone,
+    T: Diagnostic,
 {
    type Item = LineDiagnostics<'a, T>;

@ -110,7 +163,7 @@ where
    }
 }

-impl<T> std::iter::FusedIterator for LineDiagnosticsIterator<'_, T> where T: Clone + Ranged {}
+impl<T> std::iter::FusedIterator for LineDiagnosticsIterator<'_, T> where T: Diagnostic {}

 /// All diagnostics that start on a single line of source code in one embedded Python file.
 #[derive(Debug)]
@ -139,11 +192,13 @@ struct DiagnosticWithLine<T> {
 #[cfg(test)]
 mod tests {
    use crate::db::Db;
+    use crate::diagnostic::Diagnostic;
    use ruff_db::files::system_path_to_file;
    use ruff_db::source::line_index;
    use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
    use ruff_source_file::OneIndexed;
    use ruff_text_size::{TextRange, TextSize};
+    use std::borrow::Cow;

    #[test]
    fn sort_and_group() {
@ -152,13 +207,18 @@ mod tests {
        let file = system_path_to_file(&db, "/src/test.py").unwrap();
        let lines = line_index(&db, file);

-        let ranges = vec![
+        let ranges = [
            TextRange::new(TextSize::new(0), TextSize::new(1)),
            TextRange::new(TextSize::new(5), TextSize::new(10)),
            TextRange::new(TextSize::new(1), TextSize::new(7)),
        ];

-        let sorted = super::SortedDiagnostics::new(&ranges, &lines);
+        let diagnostics: Vec<_> = ranges
+            .into_iter()
+            .map(|range| DummyDiagnostic { range })
+            .collect();
+
+        let sorted = super::SortedDiagnostics::new(diagnostics, &lines);
        let grouped = sorted.iter_lines().collect::<Vec<_>>();

        let [line1, line2] = &grouped[..] else {
@ -170,4 +230,23 @@ mod tests {
        assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(1));
        assert_eq!(line2.diagnostics.len(), 1);
    }
+
+    #[derive(Debug)]
+    struct DummyDiagnostic {
+        range: TextRange,
+    }
+
+    impl Diagnostic for DummyDiagnostic {
+        fn rule(&self) -> &str {
+            "dummy"
+        }
+
+        fn message(&self) -> Cow<str> {
+            "dummy".into()
+        }
+
+        fn range(&self) -> TextRange {
+            self.range
+        }
+    }
 }
--- a/crates/red_knot_test/src/lib.rs
+++ b/crates/red_knot_test/src/lib.rs
@ -1,3 +1,4 @@
+use crate::diagnostic::Diagnostic;
 use colored::Colorize;
 use parser as test_parser;
 use red_knot_python_semantic::types::check_types;
@ -7,6 +8,7 @@ use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
 use ruff_source_file::LineIndex;
 use ruff_text_size::TextSize;
 use std::path::Path;
+use std::sync::Arc;

 mod assertion;
 mod db;
@ -87,16 +89,23 @@ fn run_test(db: &mut db::Db, test: &parser::MarkdownTest) -> Result<(), Failures
        .filter_map(|test_file| {
            let parsed = parsed_module(db, test_file.file);

-            // TODO allow testing against code with syntax errors
-            assert!(
-                parsed.errors().is_empty(),
-                "Python syntax errors in {}, {}: {:?}",
-                test.name(),
-                test_file.file.path(db),
-                parsed.errors()
-            );
+            let mut diagnostics: Vec<Box<_>> = parsed
+                .errors()
+                .iter()
+                .cloned()
+                .map(|error| {
+                    let diagnostic: Box<dyn Diagnostic> = Box::new(error);
+                    diagnostic
+                })
+                .collect();

-            match matcher::match_file(db, test_file.file, check_types(db, test_file.file)) {
+            let type_diagnostics = check_types(db, test_file.file);
+            diagnostics.extend(type_diagnostics.into_iter().map(|diagnostic| {
+                let diagnostic: Box<dyn Diagnostic> = Box::new(Arc::unwrap_or_clone(diagnostic));
+                diagnostic
+            }));
+
+            match matcher::match_file(db, test_file.file, diagnostics) {
                Ok(()) => None,
                Err(line_failures) => Some(FileFailures {
                    backtick_offset: test_file.backtick_offset,
--- a/crates/red_knot_test/src/matcher.rs
+++ b/crates/red_knot_test/src/matcher.rs
@ -1,17 +1,14 @@
-//! Match [`TypeCheckDiagnostic`]s against [`Assertion`]s and produce test failure messages for any
+//! Match [`Diagnostic`]s against [`Assertion`]s and produce test failure messages for any
 //! mismatches.
 use crate::assertion::{Assertion, ErrorAssertion, InlineFileAssertions};
 use crate::db::Db;
-use crate::diagnostic::SortedDiagnostics;
+use crate::diagnostic::{Diagnostic, SortedDiagnostics};
 use colored::Colorize;
-use red_knot_python_semantic::types::TypeCheckDiagnostic;
 use ruff_db::files::File;
 use ruff_db::source::{line_index, source_text, SourceText};
 use ruff_source_file::{LineIndex, OneIndexed};
-use ruff_text_size::Ranged;
 use std::cmp::Ordering;
 use std::ops::Range;
-use std::sync::Arc;

 #[derive(Debug, Default)]
 pub(super) struct FailuresByLine {
@ -55,7 +52,7 @@ pub(super) fn match_file<T>(
    diagnostics: impl IntoIterator<Item = T>,
 ) -> Result<(), FailuresByLine>
 where
-    T: Diagnostic + Clone,
+    T: Diagnostic,
 {
    // Parse assertions from comments in the file, and get diagnostics from the file; both
    // ordered by line number.
@ -126,22 +123,6 @@ where
    }
 }

-pub(super) trait Diagnostic: Ranged {
-    fn rule(&self) -> &str;
-
-    fn message(&self) -> &str;
-}
-
-impl Diagnostic for Arc<TypeCheckDiagnostic> {
-    fn rule(&self) -> &str {
-        self.as_ref().rule()
-    }
-
-    fn message(&self) -> &str {
-        self.as_ref().message()
-    }
-}
-
 trait Unmatched {
    fn unmatched(&self) -> String;
 }
@ -253,9 +234,9 @@ impl Matcher {
        }
    }

-    fn column<T: Ranged>(&self, ranged: &T) -> OneIndexed {
+    fn column<T: Diagnostic>(&self, diagnostic: &T) -> OneIndexed {
        self.line_index
-            .source_location(ranged.start(), &self.source)
+            .source_location(diagnostic.range().start(), &self.source)
            .column
    }

@ -323,11 +304,13 @@ impl Matcher {
 #[cfg(test)]
 mod tests {
    use super::FailuresByLine;
+    use crate::diagnostic::Diagnostic;
    use ruff_db::files::system_path_to_file;
    use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
    use ruff_python_trivia::textwrap::dedent;
    use ruff_source_file::OneIndexed;
-    use ruff_text_size::{Ranged, TextRange};
+    use ruff_text_size::TextRange;
+    use std::borrow::Cow;

    #[derive(Clone, Debug)]
    struct TestDiagnostic {
@ -347,18 +330,16 @@ mod tests {
        }
    }

-    impl super::Diagnostic for TestDiagnostic {
+    impl Diagnostic for TestDiagnostic {
        fn rule(&self) -> &str {
            self.rule
        }

-        fn message(&self) -> &str {
-            self.message
-        }
+        fn message(&self) -> Cow<str> {
+            self.message.into()
        }

-    impl Ranged for TestDiagnostic {
-        fn range(&self) -> ruff_text_size::TextRange {
+        fn range(&self) -> TextRange {
            self.range
        }
    }