ruff/crates/ty_test/src/assertion.rs

818 lines
25 KiB
Rust

//! Parse type and type-error assertions in Python comment form.
//!
//! Parses comments of the form `# revealed: SomeType` and `# error: 8 [rule-code] "message text"`.
//! In the latter case, the `8` is a column number, and `"message text"` asserts that the full
//! diagnostic message contains the text `"message text"`; all three are optional (`# error:` will
//! match any error.)
//!
//! Assertion comments may be placed at end-of-line:
//!
//! ```py
//! x: int = "foo" # error: [invalid-assignment]
//! ```
//!
//! Or as a full-line comment on the preceding line:
//!
//! ```py
//! # error: [invalid-assignment]
//! x: int = "foo"
//! ```
//!
//! Multiple assertion comments may apply to the same line; in this case all (or all but the last)
//! must be full-line comments:
//!
//! ```py
//! # error: [unbound-name]
//! reveal_type(x) # revealed: Unbound
//! ```
//!
//! or
//!
//! ```py
//! # error: [unbound-name]
//! # revealed: Unbound
//! reveal_type(x)
//! ```
use crate::db::Db;
use ruff_db::files::File;
use ruff_db::parsed::parsed_module;
use ruff_db::source::{SourceText, line_index, source_text};
use ruff_python_trivia::{CommentRanges, Cursor};
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_text_size::{Ranged, TextRange, TextSize};
use smallvec::SmallVec;
use std::ops::Deref;
use std::str::FromStr;
/// Diagnostic assertion comments in a single embedded file.
#[derive(Debug)]
pub(crate) struct InlineFileAssertions {
comment_ranges: CommentRanges,
source: SourceText,
lines: LineIndex,
}
impl InlineFileAssertions {
pub(crate) fn from_file(db: &Db, file: File) -> Self {
let source = source_text(db, file);
let lines = line_index(db, file);
let parsed = parsed_module(db, file).load(db);
let comment_ranges = CommentRanges::from(parsed.tokens());
Self {
comment_ranges,
source,
lines,
}
}
fn line_number(&self, range: &impl Ranged) -> OneIndexed {
self.lines.line_index(range.start())
}
fn is_own_line_comment(&self, ranged_assertion: &AssertionWithRange) -> bool {
CommentRanges::is_own_line(ranged_assertion.start(), self.source.as_str())
}
}
impl<'a> IntoIterator for &'a InlineFileAssertions {
type Item = LineAssertions<'a>;
type IntoIter = LineAssertionsIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
Self::IntoIter {
file_assertions: self,
inner: AssertionWithRangeIterator {
file_assertions: self,
inner: self.comment_ranges.into_iter(),
}
.peekable(),
}
}
}
/// An [`UnparsedAssertion`] with the [`TextRange`] of its original inline comment.
#[derive(Debug)]
struct AssertionWithRange<'a>(UnparsedAssertion<'a>, TextRange);
impl<'a> Deref for AssertionWithRange<'a> {
type Target = UnparsedAssertion<'a>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Ranged for AssertionWithRange<'_> {
fn range(&self) -> TextRange {
self.1
}
}
impl<'a> From<AssertionWithRange<'a>> for UnparsedAssertion<'a> {
fn from(value: AssertionWithRange<'a>) -> Self {
value.0
}
}
/// Iterator that yields all assertions within a single embedded Python file.
#[derive(Debug)]
struct AssertionWithRangeIterator<'a> {
file_assertions: &'a InlineFileAssertions,
inner: std::iter::Copied<std::slice::Iter<'a, TextRange>>,
}
impl<'a> Iterator for AssertionWithRangeIterator<'a> {
type Item = AssertionWithRange<'a>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let inner_next = self.inner.next()?;
let comment = &self.file_assertions.source[inner_next];
if let Some(assertion) = UnparsedAssertion::from_comment(comment) {
return Some(AssertionWithRange(assertion, inner_next));
}
}
}
}
impl std::iter::FusedIterator for AssertionWithRangeIterator<'_> {}
/// A vector of [`UnparsedAssertion`]s belonging to a single line.
///
/// Most lines will have zero or one assertion, so we use a [`SmallVec`] optimized for a single
/// element to avoid most heap vector allocations.
type AssertionVec<'a> = SmallVec<[UnparsedAssertion<'a>; 1]>;
#[derive(Debug)]
pub(crate) struct LineAssertionsIterator<'a> {
file_assertions: &'a InlineFileAssertions,
inner: std::iter::Peekable<AssertionWithRangeIterator<'a>>,
}
impl<'a> Iterator for LineAssertionsIterator<'a> {
type Item = LineAssertions<'a>;
fn next(&mut self) -> Option<Self::Item> {
let file = self.file_assertions;
let ranged_assertion = self.inner.next()?;
let mut collector = AssertionVec::new();
let mut line_number = file.line_number(&ranged_assertion);
// Collect all own-line comments on consecutive lines; these all apply to the same line of
// code. For example:
//
// ```py
// # error: [unbound-name]
// # revealed: Unbound
// reveal_type(x)
// ```
//
if file.is_own_line_comment(&ranged_assertion) {
collector.push(ranged_assertion.into());
let mut only_own_line = true;
while let Some(ranged_assertion) = self.inner.peek() {
let next_line_number = line_number.saturating_add(1);
if file.line_number(ranged_assertion) == next_line_number {
if !file.is_own_line_comment(ranged_assertion) {
only_own_line = false;
}
line_number = next_line_number;
collector.push(self.inner.next().unwrap().into());
// If we see an end-of-line comment, it has to be the end of the stack,
// otherwise we'd botch this case, attributing all three errors to the `bar`
// line:
//
// ```py
// # error:
// foo # error:
// bar # error:
// ```
//
if !only_own_line {
break;
}
} else {
break;
}
}
if only_own_line {
// The collected comments apply to the _next_ line in the code.
line_number = line_number.saturating_add(1);
}
} else {
// We have a line-trailing comment; it applies to its own line, and is not grouped.
collector.push(ranged_assertion.into());
}
Some(LineAssertions {
line_number,
assertions: collector,
})
}
}
impl std::iter::FusedIterator for LineAssertionsIterator<'_> {}
/// One or more assertions referring to the same line of code.
#[derive(Debug)]
pub(crate) struct LineAssertions<'a> {
/// The line these assertions refer to.
///
/// Not necessarily the same line the assertion comment is located on; for an own-line comment,
/// it's the next non-assertion line.
pub(crate) line_number: OneIndexed,
/// The assertions referring to this line.
pub(crate) assertions: AssertionVec<'a>,
}
impl<'a> Deref for LineAssertions<'a> {
type Target = [UnparsedAssertion<'a>];
fn deref(&self) -> &Self::Target {
&self.assertions
}
}
/// A single diagnostic assertion comment.
///
/// This type represents an *attempted* assertion, but not necessarily a *valid* assertion.
/// Parsing is done lazily in `matcher.rs`; this allows us to emit nicer error messages
/// in the event of an invalid assertion
#[derive(Debug)]
pub(crate) enum UnparsedAssertion<'a> {
/// A `# revealed:` assertion.
Revealed(&'a str),
/// An `# error:` assertion.
Error(&'a str),
}
impl<'a> UnparsedAssertion<'a> {
/// Returns `Some(_)` if the comment starts with `# error:` or `# revealed:`,
/// indicating that it is an assertion comment.
fn from_comment(comment: &'a str) -> Option<Self> {
let comment = comment.trim().strip_prefix('#')?.trim();
let (keyword, body) = comment.split_once(':')?;
let keyword = keyword.trim();
let body = body.trim();
match keyword {
"revealed" => Some(Self::Revealed(body)),
"error" => Some(Self::Error(body)),
_ => None,
}
}
/// Parse the attempted assertion into a [`ParsedAssertion`] structured representation.
pub(crate) fn parse(&self) -> Result<ParsedAssertion<'a>, PragmaParseError<'a>> {
match self {
Self::Revealed(revealed) => {
if revealed.is_empty() {
Err(PragmaParseError::EmptyRevealTypeAssertion)
} else {
Ok(ParsedAssertion::Revealed(revealed))
}
}
Self::Error(error) => ErrorAssertion::from_str(error)
.map(ParsedAssertion::Error)
.map_err(PragmaParseError::ErrorAssertionParseError),
}
}
}
impl std::fmt::Display for UnparsedAssertion<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Revealed(expected_type) => write!(f, "revealed: {expected_type}"),
Self::Error(assertion) => write!(f, "error: {assertion}"),
}
}
}
/// An assertion comment that has been parsed and validated for correctness.
#[derive(Debug)]
pub(crate) enum ParsedAssertion<'a> {
/// A `# revealed:` assertion.
Revealed(&'a str),
/// An `# error:` assertion.
Error(ErrorAssertion<'a>),
}
impl std::fmt::Display for ParsedAssertion<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Revealed(expected_type) => write!(f, "revealed: {expected_type}"),
Self::Error(assertion) => assertion.fmt(f),
}
}
}
/// A parsed and validated `# error:` assertion comment.
#[derive(Debug)]
pub(crate) struct ErrorAssertion<'a> {
/// The diagnostic rule code we expect.
pub(crate) rule: Option<&'a str>,
/// The column we expect the diagnostic range to start at.
pub(crate) column: Option<OneIndexed>,
/// A string we expect to be contained in the diagnostic message.
pub(crate) message_contains: Option<&'a str>,
}
impl<'a> ErrorAssertion<'a> {
fn from_str(source: &'a str) -> Result<Self, ErrorAssertionParseError<'a>> {
ErrorAssertionParser::new(source).parse()
}
}
impl std::fmt::Display for ErrorAssertion<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("error:")?;
if let Some(column) = self.column {
write!(f, " {column}")?;
}
if let Some(rule) = self.rule {
write!(f, " [{rule}]")?;
}
if let Some(message) = self.message_contains {
write!(f, r#" "{message}""#)?;
}
Ok(())
}
}
/// A parser to convert a string into a [`ErrorAssertion`].
#[derive(Debug, Clone)]
struct ErrorAssertionParser<'a> {
cursor: Cursor<'a>,
/// string slice representing all characters *after* the `# error:` prefix.
comment_source: &'a str,
}
impl<'a> ErrorAssertionParser<'a> {
fn new(comment: &'a str) -> Self {
Self {
cursor: Cursor::new(comment),
comment_source: comment,
}
}
/// Consume characters in the assertion comment until we find a non-whitespace character
fn skip_whitespace(&mut self) {
self.cursor.eat_while(char::is_whitespace);
}
/// Attempt to parse the assertion comment into a [`ErrorAssertion`].
fn parse(mut self) -> Result<ErrorAssertion<'a>, ErrorAssertionParseError<'a>> {
let mut column = None;
let mut rule = None;
self.skip_whitespace();
while let Some(character) = self.cursor.bump() {
match character {
// column number
'0'..='9' => {
if column.is_some() {
return Err(ErrorAssertionParseError::MultipleColumnNumbers);
}
if rule.is_some() {
return Err(ErrorAssertionParseError::ColumnNumberAfterRuleCode);
}
let offset = self.cursor.offset() - TextSize::new(1);
self.cursor.eat_while(|c| !c.is_whitespace());
let column_str =
&self.comment_source[TextRange::new(offset, self.cursor.offset())];
column = OneIndexed::from_str(column_str)
.map(Some)
.map_err(|e| ErrorAssertionParseError::BadColumnNumber(column_str, e))?;
}
// rule code
'[' => {
if rule.is_some() {
return Err(ErrorAssertionParseError::MultipleRuleCodes);
}
let offset = self.cursor.offset();
self.cursor.eat_while(|c| c != ']');
if self.cursor.is_eof() {
return Err(ErrorAssertionParseError::UnclosedRuleCode);
}
rule = Some(
self.comment_source[TextRange::new(offset, self.cursor.offset())].trim(),
);
self.cursor.bump();
}
// message text
'"' => {
let comment_source = self.comment_source.trim();
return if comment_source.ends_with('"') {
let rest = &comment_source
[self.cursor.offset().to_usize()..comment_source.len() - 1];
Ok(ErrorAssertion {
rule,
column,
message_contains: Some(rest),
})
} else {
Err(ErrorAssertionParseError::UnclosedMessage)
};
}
// Some other assumptions we make don't hold true if we hit this branch:
'\n' | '\r' => {
unreachable!("Assertion comments should never contain newlines")
}
// something else (bad!)...
unexpected => {
return Err(ErrorAssertionParseError::UnexpectedCharacter {
character: unexpected,
offset: self.cursor.offset().to_usize(),
});
}
}
self.skip_whitespace();
}
if rule.is_some() {
Ok(ErrorAssertion {
rule,
column,
message_contains: None,
})
} else {
Err(ErrorAssertionParseError::NoRuleOrMessage)
}
}
}
/// Enumeration of ways in which parsing an assertion comment can fail.
///
/// The assertion comment could be either a "revealed" assertion or an "error" assertion.
#[derive(Debug, thiserror::Error)]
pub(crate) enum PragmaParseError<'a> {
#[error("Must specify which type should be revealed")]
EmptyRevealTypeAssertion,
#[error("{0}")]
ErrorAssertionParseError(ErrorAssertionParseError<'a>),
}
/// Enumeration of ways in which parsing an *error* assertion comment can fail.
#[derive(Debug, thiserror::Error)]
pub(crate) enum ErrorAssertionParseError<'a> {
#[error("no rule or message text")]
NoRuleOrMessage,
#[error("bad column number `{0}`")]
BadColumnNumber(&'a str, #[source] std::num::ParseIntError),
#[error("column number must precede the rule code")]
ColumnNumberAfterRuleCode,
#[error("multiple column numbers in one assertion")]
MultipleColumnNumbers,
#[error("expected ']' to close rule code")]
UnclosedRuleCode,
#[error("cannot use multiple rule codes in one assertion")]
MultipleRuleCodes,
#[error("expected '\"' to be the final character in an assertion with an error message")]
UnclosedMessage,
#[error(
"unexpected character `{character}` at offset {offset} (relative to the `:` in the assertion comment)"
)]
UnexpectedCharacter { character: char, offset: usize },
}
#[cfg(test)]
mod tests {
use super::*;
use ruff_db::system::DbWithWritableSystem as _;
use ruff_db::{Db as _, files::system_path_to_file};
use ruff_python_trivia::textwrap::dedent;
use ruff_source_file::OneIndexed;
use ty_python_semantic::{
Program, ProgramSettings, PythonPlatform, PythonVersionWithSource, SearchPathSettings,
};
fn get_assertions(source: &str) -> InlineFileAssertions {
let mut db = Db::setup();
let settings = ProgramSettings {
python_version: PythonVersionWithSource::default(),
python_platform: PythonPlatform::default(),
search_paths: SearchPathSettings::new(Vec::new())
.to_search_paths(db.system(), db.vendored())
.unwrap(),
};
Program::init_or_update(&mut db, settings);
db.write_file("/src/test.py", source).unwrap();
let file = system_path_to_file(&db, "/src/test.py").unwrap();
InlineFileAssertions::from_file(&db, file)
}
fn as_vec(assertions: &InlineFileAssertions) -> Vec<LineAssertions> {
assertions.into_iter().collect()
}
#[test]
fn ty_display() {
let assertions = get_assertions(&dedent(
"
reveal_type(1) # revealed: Literal[1]
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(format!("{assert}"), "revealed: Literal[1]");
}
#[test]
fn error() {
let assertions = get_assertions(&dedent(
"
x # error:
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(format!("{assert}"), "error: ");
}
#[test]
fn prior_line() {
let assertions = get_assertions(&dedent(
"
# revealed: Literal[1]
reveal_type(1)
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(format!("{assert}"), "revealed: Literal[1]");
}
#[test]
fn stacked_prior_line() {
let assertions = get_assertions(&dedent(
"
# revealed: Unbound
# error: [unbound-name]
reveal_type(x)
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(3));
let [assert1, assert2] = &line.assertions[..] else {
panic!("expected two assertions");
};
assert_eq!(format!("{assert1}"), "revealed: Unbound");
assert_eq!(format!("{assert2}"), "error: [unbound-name]");
}
#[test]
fn stacked_mixed() {
let assertions = get_assertions(&dedent(
"
# revealed: Unbound
reveal_type(x) # error: [unbound-name]
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
let [assert1, assert2] = &line.assertions[..] else {
panic!("expected two assertions");
};
assert_eq!(format!("{assert1}"), "revealed: Unbound");
assert_eq!(format!("{assert2}"), "error: [unbound-name]");
}
#[test]
fn multiple_lines() {
let assertions = get_assertions(&dedent(
r#"
# error: [invalid-assignment]
x: int = "foo"
y # error: [unbound-name]
"#,
));
let [line1, line2] = &as_vec(&assertions)[..] else {
panic!("expected two lines");
};
assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(2));
assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(3));
let [UnparsedAssertion::Error(error1)] = &line1.assertions[..] else {
panic!("expected one error assertion");
};
let error1 = ErrorAssertion::from_str(error1).unwrap();
assert_eq!(error1.rule, Some("invalid-assignment"));
let [UnparsedAssertion::Error(error2)] = &line2.assertions[..] else {
panic!("expected one error assertion");
};
let error2 = ErrorAssertion::from_str(error2).unwrap();
assert_eq!(error2.rule, Some("unbound-name"));
}
#[test]
fn multiple_lines_mixed_stack() {
let assertions = get_assertions(&dedent(
r#"
# error: [invalid-assignment]
x: int = reveal_type("foo") # revealed: str
y # error: [unbound-name]
"#,
));
let [line1, line2] = &as_vec(&assertions)[..] else {
panic!("expected two lines");
};
assert_eq!(line1.line_number, OneIndexed::from_zero_indexed(2));
assert_eq!(line2.line_number, OneIndexed::from_zero_indexed(3));
let [
UnparsedAssertion::Error(error1),
UnparsedAssertion::Revealed(expected_ty),
] = &line1.assertions[..]
else {
panic!("expected one error assertion and one Revealed assertion");
};
let error1 = ErrorAssertion::from_str(error1).unwrap();
assert_eq!(error1.rule, Some("invalid-assignment"));
assert_eq!(expected_ty.trim(), "str");
let [UnparsedAssertion::Error(error2)] = &line2.assertions[..] else {
panic!("expected one error assertion");
};
let error2 = ErrorAssertion::from_str(error2).unwrap();
assert_eq!(error2.rule, Some("unbound-name"));
}
#[test]
fn error_with_rule() {
let assertions = get_assertions(&dedent(
"
x # error: [unbound-name]
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(format!("{assert}"), "error: [unbound-name]");
}
#[test]
fn error_with_rule_and_column() {
let assertions = get_assertions(&dedent(
"
x # error: 1 [unbound-name]
",
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(1));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(format!("{assert}"), "error: 1 [unbound-name]");
}
#[test]
fn error_with_rule_and_message() {
let assertions = get_assertions(&dedent(
r#"
# error: [unbound-name] "`x` is unbound"
x
"#,
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(
format!("{assert}"),
r#"error: [unbound-name] "`x` is unbound""#
);
}
#[test]
fn error_with_message_and_column() {
let assertions = get_assertions(&dedent(
r#"
# error: 1 "`x` is unbound"
x
"#,
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(format!("{assert}"), r#"error: 1 "`x` is unbound""#);
}
#[test]
fn error_with_rule_and_message_and_column() {
let assertions = get_assertions(&dedent(
r#"
# error: 1 [unbound-name] "`x` is unbound"
x
"#,
));
let [line] = &as_vec(&assertions)[..] else {
panic!("expected one line");
};
assert_eq!(line.line_number, OneIndexed::from_zero_indexed(2));
let [assert] = &line.assertions[..] else {
panic!("expected one assertion");
};
assert_eq!(
format!("{assert}"),
r#"error: 1 [unbound-name] "`x` is unbound""#
);
}
}