ruff/crates/ty_test/src/parser.rs
2025-05-16 13:25:28 +02:00

1938 lines
54 KiB
Rust

use std::{
borrow::Cow,
collections::hash_map::Entry,
fmt::{Formatter, LowerHex, Write},
hash::Hash,
};
use anyhow::bail;
use ruff_db::system::{SystemPath, SystemPathBuf};
use rustc_hash::FxHashMap;
use ruff_index::{IndexVec, newtype_index};
use ruff_python_ast::PySourceType;
use ruff_python_trivia::Cursor;
use ruff_source_file::{LineIndex, LineRanges, OneIndexed};
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustc_stable_hash::{FromStableHash, SipHasher128Hash, StableSipHasher128};
use crate::config::MarkdownTestConfig;
/// Parse the Markdown `source` as a test suite with given `title`.
pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
let parser = Parser::new(title, source);
parser.parse()
}
/// A parsed markdown file containing tests.
///
/// Borrows from the source string and filepath it was created from.
#[derive(Debug)]
pub(crate) struct MarkdownTestSuite<'s> {
/// Header sections.
sections: IndexVec<SectionId, Section<'s>>,
/// Test files embedded within the Markdown file.
files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,
}
impl<'s> MarkdownTestSuite<'s> {
pub(crate) fn tests(&self) -> MarkdownTestIterator<'_, 's> {
MarkdownTestIterator {
suite: self,
current_file_index: 0,
}
}
}
struct Hash128([u64; 2]);
impl FromStableHash for Hash128 {
type Hash = SipHasher128Hash;
fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 {
Hash128(hash)
}
}
impl LowerHex for Hash128 {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let Self(hash) = self;
// Only write the first half for concision
write!(f, "{:x}", hash[0])
}
}
/// A single test inside a [`MarkdownTestSuite`].
///
/// A test is a single header section (or the implicit root section, if there are no Markdown
/// headers in the file), containing one or more embedded Python files as fenced code blocks, and
/// containing no nested header subsections.
#[derive(Debug)]
pub(crate) struct MarkdownTest<'m, 's> {
suite: &'m MarkdownTestSuite<'s>,
section: &'m Section<'s>,
files: &'m [EmbeddedFile<'s>],
}
impl<'m, 's> MarkdownTest<'m, 's> {
const MAX_TITLE_LENGTH: usize = 20;
const ELLIPSIS: char = '\u{2026}';
fn contracted_title(title: &str) -> String {
if title.len() <= Self::MAX_TITLE_LENGTH {
return (*title).to_string();
}
format!(
"{}{}",
title
.chars()
.take(Self::MAX_TITLE_LENGTH)
.collect::<String>(),
Self::ELLIPSIS
)
}
fn joined_name(&self, contracted: bool) -> String {
let mut name_fragments = vec![];
let mut parent_id = self.section.parent_id;
while let Some(next_id) = parent_id {
let parent = &self.suite.sections[next_id];
name_fragments.insert(0, parent.title);
parent_id = parent.parent_id;
}
name_fragments.push(self.section.title);
let full_name = name_fragments.join(" - ");
if !contracted {
return full_name;
}
let mut contracted_name = name_fragments
.iter()
.map(|fragment| Self::contracted_title(fragment))
.collect::<Vec<_>>()
.join(" - ");
let mut hasher = StableSipHasher128::new();
full_name.hash(&mut hasher);
let _ = write!(contracted_name, " ({:x})", hasher.finish::<Hash128>());
contracted_name
}
pub(crate) fn uncontracted_name(&self) -> String {
self.joined_name(false)
}
pub(crate) fn name(&self) -> String {
self.joined_name(true)
}
pub(crate) fn files(&self) -> impl Iterator<Item = &'m EmbeddedFile<'s>> {
self.files.iter()
}
pub(crate) fn configuration(&self) -> &MarkdownTestConfig {
&self.section.config
}
pub(super) fn should_snapshot_diagnostics(&self) -> bool {
self.section.snapshot_diagnostics
}
}
/// Iterator yielding all [`MarkdownTest`]s in a [`MarkdownTestSuite`].
#[derive(Debug)]
pub(crate) struct MarkdownTestIterator<'m, 's> {
suite: &'m MarkdownTestSuite<'s>,
current_file_index: usize,
}
impl<'m, 's> Iterator for MarkdownTestIterator<'m, 's> {
type Item = MarkdownTest<'m, 's>;
fn next(&mut self) -> Option<Self::Item> {
let mut current_file_index = self.current_file_index;
let mut file = self.suite.files.get(current_file_index.into());
let section_id = file?.section;
while file.is_some_and(|file| file.section == section_id) {
current_file_index += 1;
file = self.suite.files.get(current_file_index.into());
}
let files = &self.suite.files[EmbeddedFileId::from_usize(self.current_file_index)
..EmbeddedFileId::from_usize(current_file_index)];
self.current_file_index = current_file_index;
Some(MarkdownTest {
suite: self.suite,
section: &self.suite.sections[section_id],
files,
})
}
}
#[newtype_index]
struct SectionId;
/// A single header section of a [`MarkdownTestSuite`], or the implicit root "section".
///
/// A header section is the part of a Markdown file beginning with a `#`-prefixed header line, and
/// extending until the next header line at the same or higher outline level (that is, with the
/// same number or fewer `#` characters).
///
/// A header section may either contain one or more embedded Python files (making it a
/// [`MarkdownTest`]), or it may contain nested sections (headers with more `#` characters), but
/// not both.
#[derive(Debug)]
struct Section<'s> {
title: &'s str,
level: u8,
parent_id: Option<SectionId>,
config: MarkdownTestConfig,
snapshot_diagnostics: bool,
}
#[newtype_index]
struct EmbeddedFileId;
/// Holds information about the start and the end of a code block in a Markdown file.
///
/// The start is the offset of the first triple-backtick in the code block, and the end is the
/// offset of the (start of the) closing triple-backtick.
#[derive(Debug, Clone)]
pub(crate) struct BacktickOffsets(TextSize, TextSize);
/// Holds information about the position and length of all code blocks that are part of
/// a single embedded file in a Markdown file. This is used to reconstruct absolute line
/// numbers (in the Markdown file) from relative line numbers (in the embedded file).
///
/// If we have a Markdown section with multiple code blocks like this:
///
/// 01 # Test
/// 02
/// 03 Part 1:
/// 04
/// 05 ```py
/// 06 a = 1 # Relative line number: 1
/// 07 b = 2 # Relative line number: 2
/// 08 ```
/// 09
/// 10 Part 2:
/// 11
/// 12 ```py
/// 13 c = 3 # Relative line number: 3
/// 14 ```
///
/// We want to reconstruct the absolute line number (left) from relative
/// line numbers. The information we have is the start line and the line
/// count of each code block:
///
/// - Block 1: (start = 5, count = 2)
/// - Block 2: (start = 12, count = 1)
///
/// For example, if we see a relative line number of 3, we see that it is
/// larger than the line count of the first block, so we subtract the line
/// count of the first block, and then add the new relative line number (1)
/// to the absolute start line of the second block (12), resulting in an
/// absolute line number of 13.
pub(crate) struct EmbeddedFileSourceMap {
start_line_and_line_count: Vec<(usize, usize)>,
}
impl EmbeddedFileSourceMap {
pub(crate) fn new(
md_index: &LineIndex,
dimensions: impl IntoIterator<Item = BacktickOffsets>,
) -> EmbeddedFileSourceMap {
EmbeddedFileSourceMap {
start_line_and_line_count: dimensions
.into_iter()
.map(|d| {
let start_line = md_index.line_index(d.0).get();
let end_line = md_index.line_index(d.1).get();
let code_line_count = (end_line - start_line) - 1;
(start_line, code_line_count)
})
.collect(),
}
}
pub(crate) fn to_absolute_line_number(&self, relative_line_number: OneIndexed) -> OneIndexed {
let mut absolute_line_number = 0;
let mut relative_line_number = relative_line_number.get();
for (start_line, line_count) in &self.start_line_and_line_count {
if relative_line_number > *line_count {
relative_line_number -= *line_count;
} else {
absolute_line_number = start_line + relative_line_number;
break;
}
}
OneIndexed::new(absolute_line_number).expect("Relative line number out of bounds")
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) enum EmbeddedFilePath<'s> {
Autogenerated(PySourceType),
Explicit(&'s str),
}
impl EmbeddedFilePath<'_> {
pub(crate) fn as_str(&self) -> &str {
match self {
EmbeddedFilePath::Autogenerated(PySourceType::Python) => "mdtest_snippet.py",
EmbeddedFilePath::Autogenerated(PySourceType::Stub) => "mdtest_snippet.pyi",
EmbeddedFilePath::Autogenerated(PySourceType::Ipynb) => "mdtest_snippet.ipynb",
EmbeddedFilePath::Explicit(path) => path,
}
}
fn is_explicit(&self) -> bool {
matches!(self, EmbeddedFilePath::Explicit(_))
}
fn is_allowed_explicit_path(path: &str) -> bool {
[PySourceType::Python, PySourceType::Stub]
.iter()
.all(|source_type| path != EmbeddedFilePath::Autogenerated(*source_type).as_str())
}
}
/// A single file embedded in a [`Section`] as a fenced code block.
///
/// Currently must be a Python file (`py` language), a type stub (`pyi`) or a [typeshed `VERSIONS`]
/// file.
///
/// TOML configuration blocks are also supported, but are not stored as `EmbeddedFile`s. In the
/// future we plan to support `pth` files as well.
///
/// A Python embedded file makes its containing [`Section`] into a [`MarkdownTest`], and will be
/// type-checked and searched for inline-comment assertions to match against the diagnostics from
/// type checking.
///
/// [typeshed `VERSIONS`]: https://github.com/python/typeshed/blob/c546278aae47de0b2b664973da4edb613400f6ce/stdlib/VERSIONS#L1-L18
#[derive(Debug)]
pub(crate) struct EmbeddedFile<'s> {
section: SectionId,
path: EmbeddedFilePath<'s>,
pub(crate) lang: &'s str,
pub(crate) code: Cow<'s, str>,
pub(crate) backtick_offsets: Vec<BacktickOffsets>,
}
impl EmbeddedFile<'_> {
fn append_code(&mut self, backtick_offsets: BacktickOffsets, new_code: &str) {
// Treat empty code blocks as non-existent, instead of creating
// an additional empty line:
if new_code.is_empty() {
return;
}
self.backtick_offsets.push(backtick_offsets);
let existing_code = self.code.to_mut();
existing_code.push('\n');
existing_code.push_str(new_code);
}
pub(crate) fn relative_path(&self) -> &str {
self.path.as_str()
}
/// Returns the full path using unix file-path convention.
pub(crate) fn full_path(&self, project_root: &SystemPath) -> SystemPathBuf {
// Don't use `SystemPath::absolute` here because it's platform dependent
// and we want to use unix file-path convention.
let relative_path = self.relative_path();
if relative_path.starts_with('/') {
SystemPathBuf::from(relative_path)
} else {
project_root.join(relative_path)
}
}
}
#[derive(Debug)]
struct SectionStack(Vec<SectionId>);
impl SectionStack {
fn new(root_section_id: SectionId) -> Self {
Self(vec![root_section_id])
}
fn push(&mut self, section_id: SectionId) {
self.0.push(section_id);
}
fn pop(&mut self) -> Option<SectionId> {
let popped = self.0.pop();
debug_assert_ne!(popped, None, "Should never pop the implicit root section");
debug_assert!(
!self.0.is_empty(),
"Should never pop the implicit root section"
);
popped
}
fn top(&mut self) -> SectionId {
*self
.0
.last()
.expect("Should never pop the implicit root section")
}
}
/// Parse the source of a Markdown file into a [`MarkdownTestSuite`].
#[derive(Debug)]
struct Parser<'s> {
/// [`Section`]s of the final [`MarkdownTestSuite`].
sections: IndexVec<SectionId, Section<'s>>,
/// [`EmbeddedFile`]s of the final [`MarkdownTestSuite`].
files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,
/// The unparsed remainder of the Markdown source.
cursor: Cursor<'s>,
// Number of consecutive empty lines.
preceding_blank_lines: usize,
// Explicitly specified path for the upcoming code block.
explicit_path: Option<&'s str>,
source: &'s str,
source_len: TextSize,
/// Stack of ancestor sections.
stack: SectionStack,
/// Names of embedded files in current active section.
current_section_files: FxHashMap<EmbeddedFilePath<'s>, EmbeddedFileId>,
/// Whether or not the current section has a config block.
current_section_has_config: bool,
}
impl<'s> Parser<'s> {
fn new(title: &'s str, source: &'s str) -> Self {
let mut sections = IndexVec::default();
let root_section_id = sections.push(Section {
title,
level: 0,
parent_id: None,
config: MarkdownTestConfig::default(),
snapshot_diagnostics: false,
});
Self {
sections,
source,
files: IndexVec::default(),
cursor: Cursor::new(source),
preceding_blank_lines: 0,
explicit_path: None,
source_len: source.text_len(),
stack: SectionStack::new(root_section_id),
current_section_files: FxHashMap::default(),
current_section_has_config: false,
}
}
fn parse(mut self) -> anyhow::Result<MarkdownTestSuite<'s>> {
self.parse_impl()?;
Ok(self.finish())
}
fn finish(mut self) -> MarkdownTestSuite<'s> {
self.sections.shrink_to_fit();
self.files.shrink_to_fit();
MarkdownTestSuite {
sections: self.sections,
files: self.files,
}
}
fn skip_whitespace(&mut self) {
self.cursor.eat_while(|c| c.is_whitespace() && c != '\n');
}
fn skip_to_beginning_of_next_line(&mut self) -> bool {
if let Some(position) = memchr::memchr(b'\n', self.cursor.as_bytes()) {
self.cursor.skip_bytes(position + 1);
true
} else {
false
}
}
fn consume_until(&mut self, mut end_predicate: impl FnMut(char) -> bool) -> Option<&'s str> {
let start = self.offset().to_usize();
while !self.cursor.is_eof() {
if end_predicate(self.cursor.first()) {
return Some(&self.source[start..self.offset().to_usize()]);
}
self.cursor.bump();
}
None
}
fn parse_impl(&mut self) -> anyhow::Result<()> {
const SECTION_CONFIG_SNAPSHOT: &str = "snapshot-diagnostics";
const HTML_COMMENT_ALLOWLIST: &[&str] = &["blacken-docs:on", "blacken-docs:off"];
const CODE_BLOCK_END: &[u8] = b"```";
const HTML_COMMENT_END: &[u8] = b"-->";
while let Some(first) = self.cursor.bump() {
match first {
'<' if self.cursor.eat_char3('!', '-', '-') => {
if let Some(position) =
memchr::memmem::find(self.cursor.as_bytes(), HTML_COMMENT_END)
{
let html_comment = self.cursor.as_str()[..position].trim();
if html_comment == SECTION_CONFIG_SNAPSHOT {
self.process_snapshot_diagnostics()?;
} else if !HTML_COMMENT_ALLOWLIST.contains(&html_comment) {
bail!(
"Unknown HTML comment `{}` -- possibly a `snapshot-diagnostics` typo? \
(Add to `HTML_COMMENT_ALLOWLIST` if this is a false positive)",
html_comment
);
}
self.cursor.skip_bytes(position + HTML_COMMENT_END.len());
} else {
bail!("Unterminated HTML comment.");
}
}
'#' => {
self.explicit_path = None;
self.preceding_blank_lines = 0;
// Determine header level (number of '#' characters)
let mut header_level = 1;
while self.cursor.eat_char('#') {
header_level += 1;
}
// Parse header title
if let Some(title) = self.consume_until(|c| c == '\n') {
let title = title.trim();
if !title.is_empty() {
self.process_header(header_level, title)?;
}
}
}
'`' => {
if self.cursor.eat_char2('`', '`') {
// We saw the triple-backtick beginning of a code block.
let backtick_offset_start = self.offset() - "```".text_len();
if self.preceding_blank_lines < 1 && self.explicit_path.is_none() {
bail!(
"Code blocks must start on a new line and be preceded by at least one blank line."
);
}
self.skip_whitespace();
// Parse the code block language specifier
let lang = self
.consume_until(|c| matches!(c, ' ' | '\n'))
.unwrap_or_default();
self.skip_whitespace();
if !self.cursor.eat_char('\n') {
bail!(
"Trailing code-block metadata is not supported. Only the code block language can be specified."
);
}
if let Some(position) =
memchr::memmem::find(self.cursor.as_bytes(), CODE_BLOCK_END)
{
let mut code = &self.cursor.as_str()[..position];
self.cursor.skip_bytes(position + CODE_BLOCK_END.len());
if code.ends_with('\n') {
code = &code[..code.len() - '\n'.len_utf8()];
}
let backtick_offset_end = self.offset() - "```".text_len();
self.process_code_block(
lang,
code,
BacktickOffsets(backtick_offset_start, backtick_offset_end),
)?;
} else {
let code_block_start = self.cursor.token_len();
let line = self.source.count_lines(TextRange::up_to(code_block_start));
bail!("Unterminated code block at line {line}.");
}
self.explicit_path = None;
} else if self.preceding_blank_lines > 0 {
// This could be a line that specifies an explicit path for a Markdown code block (`module.py`:)
self.explicit_path = None;
if let Some(path) = self.consume_until(|c| matches!(c, '`' | '\n')) {
if self.cursor.eat_char('`') {
self.skip_whitespace();
if self.cursor.eat_char(':') {
self.explicit_path = Some(path);
}
}
}
}
self.preceding_blank_lines = 0;
}
'\n' => {
self.preceding_blank_lines += 1;
continue;
}
c => {
self.preceding_blank_lines = 0;
self.explicit_path = None;
if c.is_whitespace() {
self.skip_whitespace();
if self.cursor.eat_char('`')
&& self.cursor.eat_char('`')
&& self.cursor.eat_char('`')
{
bail!("Indented code blocks are not supported.");
}
}
}
}
if !self.skip_to_beginning_of_next_line() {
break;
}
}
Ok(())
}
fn process_header(&mut self, header_level: usize, title: &'s str) -> anyhow::Result<()> {
self.pop_sections_to_level(header_level);
let parent = self.stack.top();
let section = Section {
title,
level: header_level.try_into()?,
parent_id: Some(parent),
config: self.sections[parent].config.clone(),
snapshot_diagnostics: self.sections[parent].snapshot_diagnostics,
};
if !self.current_section_files.is_empty() {
bail!(
"Header '{}' not valid inside a test case; parent '{}' has code files.",
section.title,
self.sections[parent].title,
);
}
let section_id = self.sections.push(section);
self.stack.push(section_id);
self.current_section_files.clear();
self.current_section_has_config = false;
Ok(())
}
fn process_code_block(
&mut self,
lang: &'s str,
code: &'s str,
backtick_offsets: BacktickOffsets,
) -> anyhow::Result<()> {
// We never pop the implicit root section.
let section = self.stack.top();
let test_name = self.sections[section].title;
if lang == "toml" {
return self.process_config_block(code);
}
if lang == "ignore" {
return Ok(());
}
if let Some(explicit_path) = self.explicit_path {
let expected_extension = if lang == "python" { "py" } else { lang };
if !expected_extension.is_empty()
&& lang != "text"
&& !SystemPath::new(explicit_path)
.extension()
.is_none_or(|extension| extension.eq_ignore_ascii_case(expected_extension))
{
let backtick_start = self.line_index(backtick_offsets.0);
bail!(
"File extension of test file path `{explicit_path}` in test `{test_name}` does not match language specified `{lang}` of code block at line `{backtick_start}`"
);
}
}
let path = match self.explicit_path {
Some(path) => {
if !EmbeddedFilePath::is_allowed_explicit_path(path) {
bail!(
"The file name `{path}` in test `{test_name}` must not be used explicitly.",
);
}
EmbeddedFilePath::Explicit(path)
}
None => match lang {
"py" | "python" => EmbeddedFilePath::Autogenerated(PySourceType::Python),
"pyi" => EmbeddedFilePath::Autogenerated(PySourceType::Stub),
"" => {
bail!(
"Cannot auto-generate file name for code block with empty language specifier in test `{test_name}`"
);
}
_ => {
bail!(
"Cannot auto-generate file name for code block with language `{}` in test `{test_name}`",
lang
);
}
},
};
let has_merged_snippets = self.current_section_has_merged_snippets();
let has_explicit_file_paths = self.current_section_has_explicit_file_paths();
match self.current_section_files.entry(path.clone()) {
Entry::Vacant(entry) => {
if has_merged_snippets {
bail!(
"Merged snippets in test `{test_name}` are not allowed in the presence of other files."
);
}
let index = self.files.push(EmbeddedFile {
path: path.clone(),
section,
lang,
code: Cow::Borrowed(code),
backtick_offsets: vec![backtick_offsets],
});
entry.insert(index);
}
Entry::Occupied(entry) => {
if path.is_explicit() {
bail!(
"Test `{test_name}` has duplicate files named `{}`.",
path.as_str(),
);
}
if has_explicit_file_paths {
bail!(
"Merged snippets in test `{test_name}` are not allowed in the presence of other files."
);
}
let index = *entry.get();
self.files[index].append_code(backtick_offsets, code);
}
}
Ok(())
}
fn current_section_has_explicit_file_paths(&self) -> bool {
self.current_section_files
.iter()
.any(|(path, _)| path.is_explicit())
}
fn current_section_has_merged_snippets(&self) -> bool {
self.current_section_files
.values()
.any(|id| self.files[*id].backtick_offsets.len() > 1)
}
fn process_config_block(&mut self, code: &str) -> anyhow::Result<()> {
if self.current_section_has_config {
bail!("Multiple TOML configuration blocks in the same section are not allowed.");
}
let current_section = &mut self.sections[self.stack.top()];
current_section.config = MarkdownTestConfig::from_str(code)?;
self.current_section_has_config = true;
Ok(())
}
fn process_snapshot_diagnostics(&mut self) -> anyhow::Result<()> {
if self.current_section_has_config {
bail!(
"Section config to enable snapshotting diagnostics must come before \
everything else (including TOML configuration blocks).",
);
}
if !self.current_section_files.is_empty() {
bail!(
"Section config to enable snapshotting diagnostics must come before \
everything else (including embedded files).",
);
}
let current_section = &mut self.sections[self.stack.top()];
if current_section.snapshot_diagnostics {
bail!(
"Section config to enable snapshotting diagnostics should appear \
at most once.",
);
}
current_section.snapshot_diagnostics = true;
Ok(())
}
fn pop_sections_to_level(&mut self, level: usize) {
while level <= self.sections[self.stack.top()].level.into() {
self.stack.pop();
// We would have errored before pushing a child section if there were files, so we know
// no parent section can have files.
self.current_section_files.clear();
}
}
/// Retrieves the current offset of the cursor within the source code.
fn offset(&self) -> TextSize {
self.source_len - self.cursor.text_len()
}
fn line_index(&self, char_index: TextSize) -> u32 {
self.source.count_lines(TextRange::up_to(char_index))
}
}
#[cfg(test)]
mod tests {
use ruff_python_ast::PySourceType;
use ruff_python_trivia::textwrap::dedent;
use insta::assert_snapshot;
use crate::parser::EmbeddedFilePath;
#[test]
fn empty() {
let mf = super::parse("file.md", "").unwrap();
assert!(mf.tests().next().is_none());
}
#[test]
fn single_file_test() {
let source = dedent(
"
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
assert_snapshot!(test.name(), @"file.md (a8decfe8bd23e259)");
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.lang, "py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn no_new_line_at_eof() {
let source = dedent(
"
```py
x = 1
```",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
assert_snapshot!(test.name(), @"file.md (a8decfe8bd23e259)");
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.lang, "py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn multiple_tests() {
let source = dedent(
"
# One
```py
x = 1
```
# Two
```py
y = 2
```
# Three
`mod_a.pyi`:
```pyi
a: int
```
`mod_b.pyi`:
```pyi
b: str
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test1, test2, test3] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected three tests");
};
assert_snapshot!(test1.name(), @"file.md - One (9f620a533a21278)");
assert_snapshot!(test2.name(), @"file.md - Two (1b4d4ef5a2cebbdc)");
assert_snapshot!(test3.name(), @"file.md - Three (26479e23633dda57)");
let [file] = test1.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.lang, "py");
assert_eq!(file.code, "x = 1");
let [file] = test2.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.lang, "py");
assert_eq!(file.code, "y = 2");
let [file_1, file_2] = test3.files().collect::<Vec<_>>()[..] else {
panic!("expected two files");
};
assert_eq!(file_1.relative_path(), "mod_a.pyi");
assert_eq!(file_1.lang, "pyi");
assert_eq!(file_1.code, "a: int");
assert_eq!(file_2.relative_path(), "mod_b.pyi");
assert_eq!(file_2.lang, "pyi");
assert_eq!(file_2.code, "b: str");
}
#[test]
fn multiple_file_tests() {
let source = dedent(
"
# One
`main.py`:
```py
from foo import y
```
`foo.py`:
```py
y = 2
```
# Two
```py
y = 2
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test1, test2] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected two tests");
};
assert_snapshot!(test1.name(), @"file.md - One (9f620a533a21278)");
assert_snapshot!(test2.name(), @"file.md - Two (1b4d4ef5a2cebbdc)");
let [main, foo] = test1.files().collect::<Vec<_>>()[..] else {
panic!("expected two files");
};
assert_eq!(main.relative_path(), "main.py");
assert_eq!(main.lang, "py");
assert_eq!(main.code, "from foo import y");
assert_eq!(foo.relative_path(), "foo.py");
assert_eq!(foo.lang, "py");
assert_eq!(foo.code, "y = 2");
let [file] = test2.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.lang, "py");
assert_eq!(file.code, "y = 2");
}
#[test]
fn merged_snippets() {
let source = dedent(
"
# One
This is the first part of the embedded file:
```py
x = 1
```
And this is the second part:
```py
y = 2
```
And this is the third part:
```py
z = 3
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.lang, "py");
assert_eq!(file.code, "x = 1\ny = 2\nz = 3");
}
#[test]
fn no_merged_snippets_for_explicit_paths() {
let source = dedent(
"
# One
`foo.py`:
```py
x = 1
```
`foo.py`:
```py
y = 2
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Test `One` has duplicate files named `foo.py`."
);
}
#[test]
fn disallow_merged_snippets_in_presence_of_explicit_paths() {
for source in [
// Merged snippets first
"
# One
```py
x = 1
```
```py
y = 2
```
`foo.py`:
```py
print('hello')
```
",
// Explicit path first
"
# One
`foo.py`:
```py
print('hello')
```
```py
x = 1
```
```py
y = 2
```
",
] {
let err = super::parse("file.md", &dedent(source)).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Merged snippets in test `One` are not allowed in the presence of other files."
);
}
}
#[test]
fn disallow_pyi_snippets_in_presence_of_merged_py_snippets() {
let source = dedent(
"
# One
```py
x = 1
```
```py
y = 2
```
```pyi
x: int
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Merged snippets in test `One` are not allowed in the presence of other files."
);
}
#[test]
fn custom_file_path() {
let source = dedent(
"
`foo.py`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "foo.py");
assert_eq!(file.lang, "py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn multi_line_file() {
let source = dedent(
"
```py
x = 1
y = 2
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.code, "x = 1\ny = 2");
}
#[test]
fn empty_file() {
let source = dedent(
"
```py
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.code, "");
}
#[test]
fn no_lang() {
let source = dedent(
"
# No language specifier
```
x = 10
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Cannot auto-generate file name for code block with empty language specifier in test `No language specifier`"
);
}
#[test]
fn cannot_generate_name_for_lang() {
let source = dedent(
"
# JSON test?
```json
{}
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Cannot auto-generate file name for code block with language `json` in test `JSON test?`"
);
}
#[test]
fn mismatching_lang() {
let source = dedent(
"
# Accidental stub
`a.py`:
```pyi
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"File extension of test file path `a.py` in test `Accidental stub` does not match language specified `pyi` of code block at line `5`"
);
}
#[test]
fn files_with_no_extension_can_have_any_lang() {
let source = dedent(
"
`lorem`:
```foo
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "lorem");
assert_eq!(file.code, "x = 1");
}
#[test]
fn files_with_lang_text_can_have_any_paths() {
let source = dedent(
"
`lorem.yaml`:
```text
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "lorem.yaml");
assert_eq!(file.code, "x = 1");
}
#[test]
fn unterminated_code_block_1() {
let source = dedent(
"
```
x = 1
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(err.to_string(), "Unterminated code block at line 2.");
}
#[test]
fn unterminated_code_block_2() {
let source = dedent(
"
## A well-fenced block
```py
y = 2
```
## A not-so-well-fenced block
```py
x = 1
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(err.to_string(), "Unterminated code block at line 10.");
}
#[test]
fn header_start_at_beginning_of_line() {
let source = dedent(
"
# A test
# not a header
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
assert_snapshot!(test.name(), @"file.md - A test (1b4e27e6123dc8e7)");
}
#[test]
fn code_blocks_must_not_be_indented() {
let source = dedent(
"
# A test?
```py
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(err.to_string(), "Indented code blocks are not supported.");
}
#[test]
fn no_header_inside_test() {
let source = dedent(
"
# One
```py
x = 1
```
## Two
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Header 'Two' not valid inside a test case; parent 'One' has code files."
);
}
#[test]
fn line_break_in_header_1() {
let source = dedent(
"
#
Foo
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(test.section.title, "file.md");
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.code, "x = 1");
}
#[test]
fn line_break_in_header_2() {
let source = dedent(
"
# Foo
##
Lorem
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(test.section.title, "Foo");
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.code, "x = 1");
}
#[test]
fn no_duplicate_name_files_in_test() {
let source = dedent(
"
`foo.py`:
```py
x = 1
```
`foo.py`:
```py
y = 2
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Test `file.md` has duplicate files named `foo.py`."
);
}
#[test]
fn no_usage_of_autogenerated_name() {
let source = dedent(
"
# Name clash
`mdtest_snippet.py`:
```py
x = 1
```
```py
y = 2
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"The file name `mdtest_snippet.py` in test `Name clash` must not be used explicitly."
);
}
#[test]
fn separate_path() {
let source = dedent(
"
`foo.py`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "foo.py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn separate_path_whitespace_1() {
let source = dedent(
"
`foo.py` :
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "foo.py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn separate_path_whitespace_2() {
let source = dedent(
"
`foo.py`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "foo.py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn path_with_space() {
let source = dedent(
"
`foo bar.py`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(file.relative_path(), "foo bar.py");
assert_eq!(file.code, "x = 1");
}
#[test]
fn path_with_line_break() {
let source = dedent(
"
`foo
.py`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.code, "x = 1");
}
#[test]
fn path_with_backtick() {
let source = dedent(
"
`foo`bar.py`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.code, "x = 1");
}
#[test]
fn path_colon_on_next_line() {
let source = dedent(
"
`foo.py`
:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.code, "x = 1");
}
#[test]
fn random_trailing_backtick_quoted() {
let source = dedent(
"
A long sentence that forces a line break
`int`:
```py
x = 1
```
",
);
let mf = super::parse("file.md", &source).unwrap();
let [test] = &mf.tests().collect::<Vec<_>>()[..] else {
panic!("expected one test");
};
let [file] = test.files().collect::<Vec<_>>()[..] else {
panic!("expected one file");
};
assert_eq!(
file.path,
EmbeddedFilePath::Autogenerated(PySourceType::Python)
);
assert_eq!(file.code, "x = 1");
}
#[test]
fn no_newline_between_prose_and_code() {
// Regression test for https://github.com/astral-sh/ruff/issues/15923
let source = dedent(
"
Some code:
No newline between prose and code:
```py
# A syntax error:
§
```
",
);
super::parse("file.md", &source).expect_err(
"Code blocks must start on a new line and be preceded by at least one blank line.",
);
}
#[test]
fn config_no_longer_allowed() {
let source = dedent(
"
```py foo=bar
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Trailing code-block metadata is not supported. Only the code block language can be specified."
);
}
#[test]
fn duplicate_section_directive_not_allowed() {
let source = dedent(
"
# Some header
<!-- snapshot-diagnostics -->
<!-- snapshot-diagnostics -->
```py
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Section config to enable snapshotting diagnostics should appear at most once.",
);
}
#[test]
fn snapshot_diagnostic_directive_detection_ignores_whitespace() {
// A bit weird, but the fact that the parser rejects this indicates that
// we have correctly recognised both of these HTML comments as a directive to have
// snapshotting enabled for the file, which is what we want (even though they both
// contain different amounts of whitespace to the "standard" snapshot directive).
let source = dedent(
"
# Some header
<!-- snapshot-diagnostics -->
<!--snapshot-diagnostics-->
```py
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Section config to enable snapshotting diagnostics should appear at most once.",
);
}
#[test]
fn section_directive_must_appear_before_config() {
let source = dedent(
"
# Some header
```toml
[environment]
typeshed = \"/typeshed\"
```
<!-- snapshot-diagnostics -->
```py
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Section config to enable snapshotting diagnostics must \
come before everything else \
(including TOML configuration blocks).",
);
}
#[test]
fn section_directive_must_appear_before_embedded_files() {
let source = dedent(
"
# Some header
```py
x = 1
```
<!-- snapshot-diagnostics -->
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Section config to enable snapshotting diagnostics must \
come before everything else \
(including embedded files).",
);
}
#[test]
fn obvious_typos_in_directives_are_detected() {
let source = dedent(
"
# Some header
<!-- snpshotttt-digggggnosstic -->
```py
x = 1
```
",
);
let err = super::parse("file.md", &source).expect_err("Should fail to parse");
assert_eq!(
err.to_string(),
"Unknown HTML comment `snpshotttt-digggggnosstic` -- possibly a `snapshot-diagnostics` typo? \
(Add to `HTML_COMMENT_ALLOWLIST` if this is a false positive)",
);
}
#[test]
fn allowlisted_html_comments_are_permitted() {
let source = dedent(
"
# Some header
<!-- blacken-docs:off -->
```py
x = 1
```
<!-- blacken-docs:on -->
",
);
let parse_result = super::parse("file.md", &source);
assert!(parse_result.is_ok(), "{parse_result:?}");
}
}