//! Parse type and type-error assertions in Python comment form. //! //! Parses comments of the form `# revealed: SomeType` and `# error: 8 [rule-code] "message text"`. //! In the latter case, the `8` is a column number, and `"message text"` asserts that the full //! diagnostic message contains the text `"message text"`; all three are optional (`# error:` will //! match any error.) //! //! Assertion comments may be placed at end-of-line: //! //! ```py //! x: int = "foo" # error: [invalid-assignment] //! ``` //! //! Or as a full-line comment on the preceding line: //! //! ```py //! # error: [invalid-assignment] //! x: int = "foo" //! ``` //! //! Multiple assertion comments may apply to the same line; in this case all (or all but the last) //! must be full-line comments: //! //! ```py //! # error: [unbound-name] //! reveal_type(x) # revealed: Unbound //! ``` //! //! or //! //! ```py //! # error: [unbound-name] //! # revealed: Unbound //! reveal_type(x) //! ``` use crate::db::Db; use ruff_db::files::File; use ruff_db::parsed::parsed_module; use ruff_db::source::{SourceText, line_index, source_text}; use ruff_python_trivia::{CommentRanges, Cursor}; use ruff_source_file::{LineIndex, OneIndexed}; use ruff_text_size::{Ranged, TextRange, TextSize}; use smallvec::SmallVec; use std::ops::Deref; use std::str::FromStr; /// Diagnostic assertion comments in a single embedded file. #[derive(Debug)] pub(crate) struct InlineFileAssertions { comment_ranges: CommentRanges, source: SourceText, lines: LineIndex, } impl InlineFileAssertions { pub(crate) fn from_file(db: &Db, file: File) -> Self { let source = source_text(db, file); let lines = line_index(db, file); let parsed = parsed_module(db, file).load(db); let comment_ranges = CommentRanges::from(parsed.tokens()); Self { comment_ranges, source, lines, } } fn line_number(&self, range: &impl Ranged) -> OneIndexed { self.lines.line_index(range.start()) } fn is_own_line_comment(&self, ranged_assertion: &AssertionWithRange) -> bool { CommentRanges::is_own_line(ranged_assertion.start(), self.source.as_str()) } } impl<'a> IntoIterator for &'a InlineFileAssertions { type Item = LineAssertions<'a>; type IntoIter = LineAssertionsIterator<'a>; fn into_iter(self) -> Self::IntoIter { Self::IntoIter { file_assertions: self, inner: AssertionWithRangeIterator { file_assertions: self, inner: self.comment_ranges.into_iter(), } .peekable(), } } } /// An [`UnparsedAssertion`] with the [`TextRange`] of its original inline comment. #[derive(Debug)] struct AssertionWithRange<'a>(UnparsedAssertion<'a>, TextRange); impl<'a> Deref for AssertionWithRange<'a> { type Target = UnparsedAssertion<'a>; fn deref(&self) -> &Self::Target { &self.0 } } impl Ranged for AssertionWithRange<'_> { fn range(&self) -> TextRange { self.1 } } impl<'a> From> for UnparsedAssertion<'a> { fn from(value: AssertionWithRange<'a>) -> Self { value.0 } } /// Iterator that yields all assertions within a single embedded Python file. #[derive(Debug)] struct AssertionWithRangeIterator<'a> { file_assertions: &'a InlineFileAssertions, inner: std::iter::Copied>, } impl<'a> Iterator for AssertionWithRangeIterator<'a> { type Item = AssertionWithRange<'a>; fn next(&mut self) -> Option { loop { let inner_next = self.inner.next()?; let comment = &self.file_assertions.source[inner_next]; if let Some(assertion) = UnparsedAssertion::from_comment(comment) { return Some(AssertionWithRange(assertion, inner_next)); } } } } impl std::iter::FusedIterator for AssertionWithRangeIterator<'_> {} /// A vector of [`UnparsedAssertion`]s belonging to a single line. /// /// Most lines will have zero or one assertion, so we use a [`SmallVec`] optimized for a single /// element to avoid most heap vector allocations. type AssertionVec<'a> = SmallVec<[UnparsedAssertion<'a>; 1]>; #[derive(Debug)] pub(crate) struct LineAssertionsIterator<'a> { file_assertions: &'a InlineFileAssertions, inner: std::iter::Peekable>, } impl<'a> Iterator for LineAssertionsIterator<'a> { type Item = LineAssertions<'a>; fn next(&mut self) -> Option { let file = self.file_assertions; let ranged_assertion = self.inner.next()?; let mut collector = AssertionVec::new(); let mut line_number = file.line_number(&ranged_assertion); // Collect all own-line comments on consecutive lines; these all apply to the same line of // code. For example: // // ```py // # error: [unbound-name] // # revealed: Unbound // reveal_type(x) // ``` // if file.is_own_line_comment(&ranged_assertion) { collector.push(ranged_assertion.into()); let mut only_own_line = true; while let Some(ranged_assertion) = self.inner.peek() { let next_line_number = line_number.saturating_add(1); if file.line_number(ranged_assertion) == next_line_number { if !file.is_own_line_comment(ranged_assertion) { only_own_line = false; } line_number = next_line_number; collector.push(self.inner.next().unwrap().into()); // If we see an end-of-line comment, it has to be the end of the stack, // otherwise we'd botch this case, attributing all three errors to the `bar` // line: // // ```py // # error: // foo # error: // bar # error: // ``` // if !only_own_line { break; } } else { break; } } if only_own_line { // The collected comments apply to the _next_ line in the code. line_number = line_number.saturating_add(1); } } else { // We have a line-trailing comment; it applies to its own line, and is not grouped. collector.push(ranged_assertion.into()); } Some(LineAssertions { line_number, assertions: collector, }) } } impl std::iter::FusedIterator for LineAssertionsIterator<'_> {} /// One or more assertions referring to the same line of code. #[derive(Debug)] pub(crate) struct LineAssertions<'a> { /// The line these assertions refer to. /// /// Not necessarily the same line the assertion comment is located on; for an own-line comment, /// it's the next non-assertion line. pub(crate) line_number: OneIndexed, /// The assertions referring to this line. pub(crate) assertions: AssertionVec<'a>, } impl<'a> Deref for LineAssertions<'a> { type Target = [UnparsedAssertion<'a>]; fn deref(&self) -> &Self::Target { &self.assertions } } /// A single diagnostic assertion comment. /// /// This type represents an *attempted* assertion, but not necessarily a *valid* assertion. /// Parsing is done lazily in `matcher.rs`; this allows us to emit nicer error messages /// in the event of an invalid assertion #[derive(Debug)] pub(crate) enum UnparsedAssertion<'a> { /// A `# revealed:` assertion. Revealed(&'a str), /// An `# error:` assertion. Error(&'a str), } impl<'a> UnparsedAssertion<'a> { /// Returns `Some(_)` if the comment starts with `# error:` or `# revealed:`, /// indicating that it is an assertion comment. fn from_comment(comment: &'a str) -> Option { let comment = comment.trim().strip_prefix('#')?.trim(); let (keyword, body) = comment.split_once(':')?; let keyword = keyword.trim(); let body = body.trim(); match keyword { "revealed" => Some(Self::Revealed(body)), "error" => Some(Self::Error(body)), _ => None, } } /// Parse the attempted assertion into a [`ParsedAssertion`] structured representation. pub(crate) fn parse(&self) -> Result, PragmaParseError<'a>> { match self { Self::Revealed(revealed) => { if revealed.is_empty() { Err(PragmaParseError::EmptyRevealTypeAssertion) } else { Ok(ParsedAssertion::Revealed(revealed)) } } Self::Error(error) => ErrorAssertion::from_str(error) .map(ParsedAssertion::Error) .map_err(PragmaParseError::ErrorAssertionParseError), } } } impl std::fmt::Display for UnparsedAssertion<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Revealed(expected_type) => write!(f, "revealed: {expected_type}"), Self::Error(assertion) => write!(f, "error: {assertion}"), } } } /// An assertion comment that has been parsed and validated for correctness. #[derive(Debug)] pub(crate) enum ParsedAssertion<'a> { /// A `# revealed:` assertion. Revealed(&'a str), /// An `# error:` assertion. Error(ErrorAssertion<'a>), } impl std::fmt::Display for ParsedAssertion<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Revealed(expected_type) => write!(f, "revealed: {expected_type}"), Self::Error(assertion) => assertion.fmt(f), } } } /// A parsed and validated `# error:` assertion comment. #[derive(Debug)] pub(crate) struct ErrorAssertion<'a> { /// The diagnostic rule code we expect. pub(crate) rule: Option<&'a str>, /// The column we expect the diagnostic range to start at. pub(crate) column: Option, /// A string we expect to be contained in the diagnostic message. pub(crate) message_contains: Option<&'a str>, } impl<'a> ErrorAssertion<'a> { fn from_str(source: &'a str) -> Result> { ErrorAssertionParser::new(source).parse() } } impl std::fmt::Display for ErrorAssertion<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("error:")?; if let Some(column) = self.column { write!(f, " {column}")?; } if let Some(rule) = self.rule { write!(f, " [{rule}]")?; } if let Some(message) = self.message_contains { write!(f, r#" "{message}""#)?; } Ok(()) } } /// A parser to convert a string into a [`ErrorAssertion`]. #[derive(Debug, Clone)] struct ErrorAssertionParser<'a> { cursor: Cursor<'a>, /// string slice representing all characters *after* the `# error:` prefix. comment_source: &'a str, } impl<'a> ErrorAssertionParser<'a> { fn new(comment: &'a str) -> Self { Self { cursor: Cursor::new(comment), comment_source: comment, } } /// Consume characters in the assertion comment until we find a non-whitespace character fn skip_whitespace(&mut self) { self.cursor.eat_while(char::is_whitespace); } /// Attempt to parse the assertion comment into a [`ErrorAssertion`]. fn parse(mut self) -> Result, ErrorAssertionParseError<'a>> { let mut column = None; let mut rule = None; self.skip_whitespace(); while let Some(character) = self.cursor.bump() { match character { // column number '0'..='9' => { if column.is_some() { return Err(ErrorAssertionParseError::MultipleColumnNumbers); } if rule.is_some() { return Err(ErrorAssertionParseError::ColumnNumberAfterRuleCode); } let offset = self.cursor.offset() - TextSize::new(1); self.cursor.eat_while(|c| !c.is_whitespace()); let column_str = &self.comment_source[TextRange::new(offset, self.cursor.offset())]; column = OneIndexed::from_str(column_str) .map(Some) .map_err(|e| ErrorAssertionParseError::BadColumnNumber(column_str, e))?; } // rule code '[' => { if rule.is_some() { return Err(ErrorAssertionParseError::MultipleRuleCodes); } let offset = self.cursor.offset(); self.cursor.eat_while(|c| c != ']'); if self.cursor.is_eof() { return Err(ErrorAssertionParseError::UnclosedRuleCode); } rule = Some( self.comment_source[TextRange::new(offset, self.cursor.offset())].trim(), ); self.cursor.bump(); } // message text '"' => { let comment_source = self.comment_source.trim(); return if comment_source.ends_with('"') { let rest = &comment_source [self.cursor.offset().to_usize()..comment_source.len() - 1]; Ok(ErrorAssertion { rule, column, message_contains: Some(rest), }) } else { Err(ErrorAssertionParseError::UnclosedMessage) }; } // Some other assumptions we make don't hold true if we hit this branch: '\n' | '\r' => { unreachable!("Assertion comments should never contain newlines") } // something else (bad!)... unexpected => { return Err(ErrorAssertionParseError::UnexpectedCharacter { character: unexpected, offset: self.cursor.offset().to_usize(), }); } } self.skip_whitespace(); } if rule.is_some() { Ok(ErrorAssertion { rule, column, message_contains: None, }) } else { Err(ErrorAssertionParseError::NoRuleOrMessage) } } } /// Enumeration of ways in which parsing an assertion comment can fail. /// /// The assertion comment could be either a "revealed" assertion or an "error" assertion. #[derive(Debug, thiserror::Error)] pub(crate) enum PragmaParseError<'a> { #[error("Must specify which type should be revealed")] EmptyRevealTypeAssertion, #[error("{0}")] ErrorAssertionParseError(ErrorAssertionParseError<'a>), } /// Enumeration of ways in which parsing an *error* assertion comment can fail. #[derive(Debug, thiserror::Error)] pub(crate) enum ErrorAssertionParseError<'a> { #[error("no rule or message text")] NoRuleOrMessage, #[error("bad column number `{0}`")] BadColumnNumber(&'a str, #[source] std::num::ParseIntError), #[error("column number must precede the rule code")] ColumnNumberAfterRuleCode, #[error("multiple column numbers in one assertion")] MultipleColumnNumbers, #[error("expected ']' to close rule code")] UnclosedRuleCode, #[error("cannot use multiple rule codes in one assertion")] MultipleRuleCodes, #[error("expected '\"' to be the final character in an assertion with an error message")] UnclosedMessage, #[error( "unexpected character `{character}` at offset {offset} (relative to the `:` in the assertion comment)" )] UnexpectedCharacter { character: char, offset: usize }, } #[cfg(test)] mod tests { use super::*; use ruff_db::system::DbWithWritableSystem as _; use ruff_db::{Db as _, files::system_path_to_file}; use ruff_python_trivia::textwrap::dedent; use ruff_source_file::OneIndexed; use ty_python_semantic::{ Program, ProgramSettings, PythonPlatform, PythonVersionWithSource, SearchPathSettings, }; fn get_assertions(source: &str) -> InlineFileAssertions { let mut db = Db::setup(); let settings = ProgramSettings { python_version: PythonVersionWithSource::default(), python_platform: PythonPlatform::default(), search_paths: SearchPathSettings::new(Vec::new()) .to_search_paths(db.system(), db.vendored()) .unwrap(), }; Program::init_or_update(&mut db, settings); db.write_file("/src/test.py", source).unwrap(); let file = system_path_to_file(&db, "/src/test.py").unwrap(); InlineFileAssertions::from_file(&db, file) } fn as_vec(assertions: &InlineFileAssertions) -> Vec { assertions.into_iter().collect() } #[test] fn ty_display() { let assertions = get_assertions(&dedent( " reveal_type(1) # revealed: Literal[1] ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!(format!("{assert}"), "revealed: Literal[1]"); } #[test] fn error() { let assertions = get_assertions(&dedent( " x # error: ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!(format!("{assert}"), "error: "); } #[test] fn prior_line() { let assertions = get_assertions(&dedent( " # revealed: Literal[1] reveal_type(1) ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!(format!("{assert}"), "revealed: Literal[1]"); } #[test] fn stacked_prior_line() { let assertions = get_assertions(&dedent( " # revealed: Unbound # error: [unbound-name] reveal_type(x) ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(3)); let [assert1, assert2] = &line.assertions[..] else { panic!("expected two assertions"); }; assert_eq!(format!("{assert1}"), "revealed: Unbound"); assert_eq!(format!("{assert2}"), "error: [unbound-name]"); } #[test] fn stacked_mixed() { let assertions = get_assertions(&dedent( " # revealed: Unbound reveal_type(x) # error: [unbound-name] ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2)); let [assert1, assert2] = &line.assertions[..] else { panic!("expected two assertions"); }; assert_eq!(format!("{assert1}"), "revealed: Unbound"); assert_eq!(format!("{assert2}"), "error: [unbound-name]"); } #[test] fn multiple_lines() { let assertions = get_assertions(&dedent( r#" # error: [invalid-assignment] x: int = "foo" y # error: [unbound-name] "#, )); let [line1, line2] = &as_vec(&assertions)[..] else { panic!("expected two lines"); }; assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(2)); assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(3)); let [UnparsedAssertion::Error(error1)] = &line1.assertions[..] else { panic!("expected one error assertion"); }; let error1 = ErrorAssertion::from_str(error1).unwrap(); assert_eq!(error1.rule, Some("invalid-assignment")); let [UnparsedAssertion::Error(error2)] = &line2.assertions[..] else { panic!("expected one error assertion"); }; let error2 = ErrorAssertion::from_str(error2).unwrap(); assert_eq!(error2.rule, Some("unbound-name")); } #[test] fn multiple_lines_mixed_stack() { let assertions = get_assertions(&dedent( r#" # error: [invalid-assignment] x: int = reveal_type("foo") # revealed: str y # error: [unbound-name] "#, )); let [line1, line2] = &as_vec(&assertions)[..] else { panic!("expected two lines"); }; assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(2)); assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(3)); let [ UnparsedAssertion::Error(error1), UnparsedAssertion::Revealed(expected_ty), ] = &line1.assertions[..] else { panic!("expected one error assertion and one Revealed assertion"); }; let error1 = ErrorAssertion::from_str(error1).unwrap(); assert_eq!(error1.rule, Some("invalid-assignment")); assert_eq!(expected_ty.trim(), "str"); let [UnparsedAssertion::Error(error2)] = &line2.assertions[..] else { panic!("expected one error assertion"); }; let error2 = ErrorAssertion::from_str(error2).unwrap(); assert_eq!(error2.rule, Some("unbound-name")); } #[test] fn error_with_rule() { let assertions = get_assertions(&dedent( " x # error: [unbound-name] ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!(format!("{assert}"), "error: [unbound-name]"); } #[test] fn error_with_rule_and_column() { let assertions = get_assertions(&dedent( " x # error: 1 [unbound-name] ", )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!(format!("{assert}"), "error: 1 [unbound-name]"); } #[test] fn error_with_rule_and_message() { let assertions = get_assertions(&dedent( r#" # error: [unbound-name] "`x` is unbound" x "#, )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!( format!("{assert}"), r#"error: [unbound-name] "`x` is unbound""# ); } #[test] fn error_with_message_and_column() { let assertions = get_assertions(&dedent( r#" # error: 1 "`x` is unbound" x "#, )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!(format!("{assert}"), r#"error: 1 "`x` is unbound""#); } #[test] fn error_with_rule_and_message_and_column() { let assertions = get_assertions(&dedent( r#" # error: 1 [unbound-name] "`x` is unbound" x "#, )); let [line] = &as_vec(&assertions)[..] else { panic!("expected one line"); }; assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2)); let [assert] = &line.assertions[..] else { panic!("expected one assertion"); }; assert_eq!( format!("{assert}"), r#"error: 1 [unbound-name] "`x` is unbound""# ); } }