From a56ee9268e36b9532b44c9ac58f1b4a33c2ec01a Mon Sep 17 00:00:00 2001
From: Micha Reiser <micha@reiser.io>
Date: Wed, 6 Nov 2024 12:25:52 +0100
Subject: [PATCH] Add mdtest support for files with invalid syntax (#14126)

---
 .pre-commit-config.yaml                       |  4 +
 Cargo.lock                                    |  1 +
 .../exception/invalid_exception_syntax.md     | 13 +++
 .../src/types/diagnostic.rs                   |  2 +-
 .../src/types/infer.rs                        | 23 +----
 crates/red_knot_test/Cargo.toml               |  1 +
 crates/red_knot_test/src/diagnostic.rs        | 93 +++++++++++++++++--
 crates/red_knot_test/src/lib.rs               | 27 ++++--
 crates/red_knot_test/src/matcher.rs           | 43 +++------
 9 files changed, 137 insertions(+), 70 deletions(-)
 create mode 100644 crates/red_knot_python_semantic/resources/mdtest/exception/invalid_exception_syntax.md

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1bd1389265..9db02bb6d2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -51,6 +51,10 @@ repos:
       - id: blacken-docs
         args: ["--pyi", "--line-length", "130"]
         files: '^crates/.*/resources/mdtest/.*\.md'
+        exclude: |
+          (?x)^(
+            .*?invalid(_.+)_syntax.md
+          )$
         additional_dependencies:
           - black==24.10.0
 
diff --git a/Cargo.lock b/Cargo.lock
index 1a09eb3feb..4343a2d5ed 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2173,6 +2173,7 @@ dependencies = [
  "regex",
  "ruff_db",
  "ruff_index",
+ "ruff_python_parser",
  "ruff_python_trivia",
  "ruff_source_file",
  "ruff_text_size",
diff --git a/crates/red_knot_python_semantic/resources/mdtest/exception/invalid_exception_syntax.md b/crates/red_knot_python_semantic/resources/mdtest/exception/invalid_exception_syntax.md
new file mode 100644
index 0000000000..fcf1780f1d
--- /dev/null
+++ b/crates/red_knot_python_semantic/resources/mdtest/exception/invalid_exception_syntax.md
@@ -0,0 +1,13 @@
+# Exception Handling
+
+## Invalid syntax
+
+```py
+from typing_extensions import reveal_type
+
+try:
+    print
+except as e:  # error: [invalid-syntax]
+    reveal_type(e)  # revealed: Unknown
+
+```
diff --git a/crates/red_knot_python_semantic/src/types/diagnostic.rs b/crates/red_knot_python_semantic/src/types/diagnostic.rs
index eb2ae8b2e7..35458f8a28 100644
--- a/crates/red_knot_python_semantic/src/types/diagnostic.rs
+++ b/crates/red_knot_python_semantic/src/types/diagnostic.rs
@@ -8,7 +8,7 @@ use std::sync::Arc;
 use crate::types::{ClassLiteralType, Type};
 use crate::Db;
 
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Eq, PartialEq, Clone)]
 pub struct TypeCheckDiagnostic {
     // TODO: Don't use string keys for rules
     pub(super) rule: String,
diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs
index 9465542a87..3864efcea5 100644
--- a/crates/red_knot_python_semantic/src/types/infer.rs
+++ b/crates/red_knot_python_semantic/src/types/infer.rs
@@ -5061,27 +5061,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn exception_handler_with_invalid_syntax() -> anyhow::Result<()> {
-        let mut db = setup_db();
-
-        db.write_dedented(
-            "src/a.py",
-            "
-            from typing_extensions import reveal_type
-
-            try:
-                print
-            except as e:
-                reveal_type(e)
-            ",
-        )?;
-
-        assert_file_diagnostics(&db, "src/a.py", &["Revealed type is `Unknown`"]);
-
-        Ok(())
-    }
-
     #[test]
     fn basic_comprehension() -> anyhow::Result<()> {
         let mut db = setup_db();
@@ -5424,7 +5403,7 @@ mod tests {
                     return 42
 
             class Iterable:
-                def __iter__(self) -> Iterator:
+                def __iter__(self) -> Iterator: ...
 
             x = [*NotIterable()]
             y = [*Iterable()]
diff --git a/crates/red_knot_test/Cargo.toml b/crates/red_knot_test/Cargo.toml
index 68c7f13e96..76668f8996 100644
--- a/crates/red_knot_test/Cargo.toml
+++ b/crates/red_knot_test/Cargo.toml
@@ -15,6 +15,7 @@ red_knot_python_semantic = { workspace = true }
 red_knot_vendored = { workspace = true }
 ruff_db = { workspace = true }
 ruff_index = { workspace = true }
+ruff_python_parser = { workspace = true }
 ruff_python_trivia = { workspace = true }
 ruff_source_file = { workspace = true }
 ruff_text_size = { workspace = true }
diff --git a/crates/red_knot_test/src/diagnostic.rs b/crates/red_knot_test/src/diagnostic.rs
index 56e4a87906..368777b679 100644
--- a/crates/red_knot_test/src/diagnostic.rs
+++ b/crates/red_knot_test/src/diagnostic.rs
@@ -2,10 +2,63 @@
 //!
 //! We don't assume that we will get the diagnostics in source order.
 
+use red_knot_python_semantic::types::TypeCheckDiagnostic;
+use ruff_python_parser::ParseError;
 use ruff_source_file::{LineIndex, OneIndexed};
-use ruff_text_size::Ranged;
+use ruff_text_size::{Ranged, TextRange};
+use std::borrow::Cow;
 use std::ops::{Deref, Range};
 
+pub(super) trait Diagnostic: std::fmt::Debug {
+    fn rule(&self) -> &str;
+
+    fn message(&self) -> Cow<str>;
+
+    fn range(&self) -> TextRange;
+}
+
+impl Diagnostic for TypeCheckDiagnostic {
+    fn rule(&self) -> &str {
+        TypeCheckDiagnostic::rule(self)
+    }
+
+    fn message(&self) -> Cow<str> {
+        TypeCheckDiagnostic::message(self).into()
+    }
+
+    fn range(&self) -> TextRange {
+        Ranged::range(self)
+    }
+}
+
+impl Diagnostic for ParseError {
+    fn rule(&self) -> &str {
+        "invalid-syntax"
+    }
+
+    fn message(&self) -> Cow<str> {
+        self.error.to_string().into()
+    }
+
+    fn range(&self) -> TextRange {
+        self.location
+    }
+}
+
+impl Diagnostic for Box<dyn Diagnostic> {
+    fn rule(&self) -> &str {
+        (**self).rule()
+    }
+
+    fn message(&self) -> Cow<str> {
+        (**self).message()
+    }
+
+    fn range(&self) -> TextRange {
+        (**self).range()
+    }
+}
+
 /// All diagnostics for one embedded Python file, sorted and grouped by start line number.
 ///
 /// The diagnostics are kept in a flat vector, sorted by line number. A separate vector of
@@ -19,13 +72,13 @@ pub(crate) struct SortedDiagnostics<T> {
 
 impl<T> SortedDiagnostics<T>
 where
-    T: Ranged + Clone,
+    T: Diagnostic,
 {
     pub(crate) fn new(diagnostics: impl IntoIterator<Item = T>, line_index: &LineIndex) -> Self {
         let mut diagnostics: Vec<_> = diagnostics
             .into_iter()
             .map(|diagnostic| DiagnosticWithLine {
-                line_number: line_index.line_index(diagnostic.start()),
+                line_number: line_index.line_index(diagnostic.range().start()),
                 diagnostic,
             })
             .collect();
@@ -94,7 +147,7 @@ pub(crate) struct LineDiagnosticsIterator<'a, T> {
 
 impl<'a, T> Iterator for LineDiagnosticsIterator<'a, T>
 where
-    T: Ranged + Clone,
+    T: Diagnostic,
 {
     type Item = LineDiagnostics<'a, T>;
 
@@ -110,7 +163,7 @@ where
     }
 }
 
-impl<T> std::iter::FusedIterator for LineDiagnosticsIterator<'_, T> where T: Clone + Ranged {}
+impl<T> std::iter::FusedIterator for LineDiagnosticsIterator<'_, T> where T: Diagnostic {}
 
 /// All diagnostics that start on a single line of source code in one embedded Python file.
 #[derive(Debug)]
@@ -139,11 +192,13 @@ struct DiagnosticWithLine<T> {
 #[cfg(test)]
 mod tests {
     use crate::db::Db;
+    use crate::diagnostic::Diagnostic;
     use ruff_db::files::system_path_to_file;
     use ruff_db::source::line_index;
     use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
     use ruff_source_file::OneIndexed;
     use ruff_text_size::{TextRange, TextSize};
+    use std::borrow::Cow;
 
     #[test]
     fn sort_and_group() {
@@ -152,13 +207,18 @@ mod tests {
         let file = system_path_to_file(&db, "/src/test.py").unwrap();
         let lines = line_index(&db, file);
 
-        let ranges = vec![
+        let ranges = [
             TextRange::new(TextSize::new(0), TextSize::new(1)),
             TextRange::new(TextSize::new(5), TextSize::new(10)),
             TextRange::new(TextSize::new(1), TextSize::new(7)),
         ];
 
-        let sorted = super::SortedDiagnostics::new(&ranges, &lines);
+        let diagnostics: Vec<_> = ranges
+            .into_iter()
+            .map(|range| DummyDiagnostic { range })
+            .collect();
+
+        let sorted = super::SortedDiagnostics::new(diagnostics, &lines);
         let grouped = sorted.iter_lines().collect::<Vec<_>>();
 
         let [line1, line2] = &grouped[..] else {
@@ -170,4 +230,23 @@ mod tests {
         assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(1));
         assert_eq!(line2.diagnostics.len(), 1);
     }
+
+    #[derive(Debug)]
+    struct DummyDiagnostic {
+        range: TextRange,
+    }
+
+    impl Diagnostic for DummyDiagnostic {
+        fn rule(&self) -> &str {
+            "dummy"
+        }
+
+        fn message(&self) -> Cow<str> {
+            "dummy".into()
+        }
+
+        fn range(&self) -> TextRange {
+            self.range
+        }
+    }
 }
diff --git a/crates/red_knot_test/src/lib.rs b/crates/red_knot_test/src/lib.rs
index cd49a98041..0022e20c55 100644
--- a/crates/red_knot_test/src/lib.rs
+++ b/crates/red_knot_test/src/lib.rs
@@ -1,3 +1,4 @@
+use crate::diagnostic::Diagnostic;
 use colored::Colorize;
 use parser as test_parser;
 use red_knot_python_semantic::types::check_types;
@@ -7,6 +8,7 @@ use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
 use ruff_source_file::LineIndex;
 use ruff_text_size::TextSize;
 use std::path::Path;
+use std::sync::Arc;
 
 mod assertion;
 mod db;
@@ -87,16 +89,23 @@ fn run_test(db: &mut db::Db, test: &parser::MarkdownTest) -> Result<(), Failures
         .filter_map(|test_file| {
             let parsed = parsed_module(db, test_file.file);
 
-            // TODO allow testing against code with syntax errors
-            assert!(
-                parsed.errors().is_empty(),
-                "Python syntax errors in {}, {}: {:?}",
-                test.name(),
-                test_file.file.path(db),
-                parsed.errors()
-            );
+            let mut diagnostics: Vec<Box<_>> = parsed
+                .errors()
+                .iter()
+                .cloned()
+                .map(|error| {
+                    let diagnostic: Box<dyn Diagnostic> = Box::new(error);
+                    diagnostic
+                })
+                .collect();
 
-            match matcher::match_file(db, test_file.file, check_types(db, test_file.file)) {
+            let type_diagnostics = check_types(db, test_file.file);
+            diagnostics.extend(type_diagnostics.into_iter().map(|diagnostic| {
+                let diagnostic: Box<dyn Diagnostic> = Box::new(Arc::unwrap_or_clone(diagnostic));
+                diagnostic
+            }));
+
+            match matcher::match_file(db, test_file.file, diagnostics) {
                 Ok(()) => None,
                 Err(line_failures) => Some(FileFailures {
                     backtick_offset: test_file.backtick_offset,
diff --git a/crates/red_knot_test/src/matcher.rs b/crates/red_knot_test/src/matcher.rs
index fbc3d9ba77..fcafaba040 100644
--- a/crates/red_knot_test/src/matcher.rs
+++ b/crates/red_knot_test/src/matcher.rs
@@ -1,17 +1,14 @@
-//! Match [`TypeCheckDiagnostic`]s against [`Assertion`]s and produce test failure messages for any
+//! Match [`Diagnostic`]s against [`Assertion`]s and produce test failure messages for any
 //! mismatches.
 use crate::assertion::{Assertion, ErrorAssertion, InlineFileAssertions};
 use crate::db::Db;
-use crate::diagnostic::SortedDiagnostics;
+use crate::diagnostic::{Diagnostic, SortedDiagnostics};
 use colored::Colorize;
-use red_knot_python_semantic::types::TypeCheckDiagnostic;
 use ruff_db::files::File;
 use ruff_db::source::{line_index, source_text, SourceText};
 use ruff_source_file::{LineIndex, OneIndexed};
-use ruff_text_size::Ranged;
 use std::cmp::Ordering;
 use std::ops::Range;
-use std::sync::Arc;
 
 #[derive(Debug, Default)]
 pub(super) struct FailuresByLine {
@@ -55,7 +52,7 @@ pub(super) fn match_file<T>(
     diagnostics: impl IntoIterator<Item = T>,
 ) -> Result<(), FailuresByLine>
 where
-    T: Diagnostic + Clone,
+    T: Diagnostic,
 {
     // Parse assertions from comments in the file, and get diagnostics from the file; both
     // ordered by line number.
@@ -126,22 +123,6 @@ where
     }
 }
 
-pub(super) trait Diagnostic: Ranged {
-    fn rule(&self) -> &str;
-
-    fn message(&self) -> &str;
-}
-
-impl Diagnostic for Arc<TypeCheckDiagnostic> {
-    fn rule(&self) -> &str {
-        self.as_ref().rule()
-    }
-
-    fn message(&self) -> &str {
-        self.as_ref().message()
-    }
-}
-
 trait Unmatched {
     fn unmatched(&self) -> String;
 }
@@ -253,9 +234,9 @@ impl Matcher {
         }
     }
 
-    fn column<T: Ranged>(&self, ranged: &T) -> OneIndexed {
+    fn column<T: Diagnostic>(&self, diagnostic: &T) -> OneIndexed {
         self.line_index
-            .source_location(ranged.start(), &self.source)
+            .source_location(diagnostic.range().start(), &self.source)
             .column
     }
 
@@ -323,11 +304,13 @@ impl Matcher {
 #[cfg(test)]
 mod tests {
     use super::FailuresByLine;
+    use crate::diagnostic::Diagnostic;
     use ruff_db::files::system_path_to_file;
     use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
     use ruff_python_trivia::textwrap::dedent;
     use ruff_source_file::OneIndexed;
-    use ruff_text_size::{Ranged, TextRange};
+    use ruff_text_size::TextRange;
+    use std::borrow::Cow;
 
     #[derive(Clone, Debug)]
     struct TestDiagnostic {
@@ -347,18 +330,16 @@ mod tests {
         }
     }
 
-    impl super::Diagnostic for TestDiagnostic {
+    impl Diagnostic for TestDiagnostic {
         fn rule(&self) -> &str {
             self.rule
         }
 
-        fn message(&self) -> &str {
-            self.message
+        fn message(&self) -> Cow<str> {
+            self.message.into()
         }
-    }
 
-    impl Ranged for TestDiagnostic {
-        fn range(&self) -> ruff_text_size::TextRange {
+        fn range(&self) -> TextRange {
             self.range
         }
     }