mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-02 18:02:23 +00:00
ruff_python_formatter: support reformatting Markdown code blocks (#9030)
(This is not possible to actually use until https://github.com/astral-sh/ruff/pull/8854 is merged.) This commit slots in support for formatting Markdown fenced code blocks[1]. With the refactoring done for reStructuredText previously, this ended up being pretty easy to add. Markdown code blocks are also quite a bit easier to parse and recognize correctly. One point of contention in #8860 is whether to assume that unlabeled Markdown code fences are Python or not by default. In this PR, we make such an assumption. This follows what `rustdoc` does. The mitigation here is that if an unlabeled code block isn't Python, then it probably won't parse as Python. And we'll end up skipping it. So in the vast majority of cases, the worst thing that can happen is a little bit of wasted work. Closes #8860 [1]: https://spec.commonmark.org/0.30/#fenced-code-blocks
This commit is contained in:
parent
b021ede481
commit
04ec11a73d
4 changed files with 5291 additions and 1 deletions
|
@ -795,6 +795,19 @@ def rst_literal_skipped_doctest():
|
|||
pass
|
||||
|
||||
|
||||
def rst_literal_skipped_markdown():
|
||||
"""
|
||||
Do cool stuff::
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def rst_directive_skipped_not_indented():
|
||||
"""
|
||||
.. code-block:: python
|
||||
|
@ -828,3 +841,496 @@ def rst_directive_skipped_doctest():
|
|||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Markdown CODE EXAMPLES
|
||||
#
|
||||
# This section shows examples of docstrings that contain code snippets in
|
||||
# Markdown fenced code blocks.
|
||||
#
|
||||
# See: https://spec.commonmark.org/0.30/#fenced-code-blocks
|
||||
###############################################################################
|
||||
|
||||
|
||||
def markdown_simple():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_simple_continued():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```python
|
||||
def cool_stuff( x ):
|
||||
print( f"hi {x}" );
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that unlabeled Markdown fenced code blocks are assumed to be Python.
|
||||
def markdown_unlabeled():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that fenced code blocks using tildes work.
|
||||
def markdown_tildes():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
~~~py
|
||||
cool_stuff( 1 )
|
||||
~~~
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that a longer closing fence is just fine and dandy.
|
||||
def markdown_longer_closing_fence():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
``````
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that an invalid closing fence is treated as invalid.
|
||||
#
|
||||
# We embed it into a docstring so that the surrounding Python
|
||||
# remains valid.
|
||||
def markdown_longer_closing_fence():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
'''
|
||||
```invalid
|
||||
'''
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that one can nest fenced code blocks by using different numbers of
|
||||
# backticks.
|
||||
def markdown_nested_fences():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
``````
|
||||
do_something( '''
|
||||
```
|
||||
did i trick you?
|
||||
```
|
||||
''' )
|
||||
``````
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that an unclosed block gobbles up everything remaining in the
|
||||
# docstring. When it's only empty lines, those are passed into the formatter
|
||||
# and thus stripped.
|
||||
def markdown_unclosed_empty_lines():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
|
||||
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that we can end the block on the second to last line of the
|
||||
# docstring.
|
||||
def markdown_second_to_last():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that an unclosed block with one extra line at the end is treated
|
||||
# correctly. As per the CommonMark spec, an unclosed fenced code block contains
|
||||
# everything following the opening fences. Since formatting the code snippet
|
||||
# trims lines, the last empty line is removed here.
|
||||
def markdown_second_to_last():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that we can end the block on the actual last line of the docstring.
|
||||
def markdown_actually_last():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that an unclosed block that ends on the last line of a docstring
|
||||
# is handled correctly.
|
||||
def markdown_unclosed_actually_last():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_with_blank_lines():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
def cool_stuff( x ):
|
||||
print( f"hi {x}" );
|
||||
|
||||
def other_stuff( y ):
|
||||
print( y )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_first_line_indent_uses_tabs_4spaces():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_first_line_indent_uses_tabs_4spaces_multiple():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_first_line_indent_uses_tabs_8spaces():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_first_line_indent_uses_tabs_8spaces_multiple():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_first_line_tab_second_line_spaces():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_odd_indentation():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Extra blanks should be *not* be preserved (unlike reST) because they are part
|
||||
# of the code snippet (per CommonMark spec), and thus get trimmed as part of
|
||||
# code formatting.
|
||||
def markdown_extra_blanks():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
|
||||
|
||||
cool_stuff( 1 )
|
||||
|
||||
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# A block can contain many empty lines within it.
|
||||
def markdown_extra_blanks_in_snippet():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
|
||||
cool_stuff( 1 )
|
||||
|
||||
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_weird_closing():
|
||||
"""
|
||||
Code block with weirdly placed closing fences.
|
||||
|
||||
```python
|
||||
cool_stuff( 1 )
|
||||
|
||||
```
|
||||
# The above fences look like it shouldn't close the block, but we
|
||||
# allow it to. The fences below re-open a block (until the end of
|
||||
# the docstring), but it's invalid Python and thus doesn't get
|
||||
# reformatted.
|
||||
a = 10
|
||||
```
|
||||
|
||||
Now the code block is closed
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_over_indented():
|
||||
"""
|
||||
A docstring
|
||||
over intended
|
||||
```python
|
||||
print( 5 )
|
||||
```
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Tests that an unclosed block gobbles up everything remaining in the
|
||||
# docstring, even if it isn't valid Python. Since it isn't valid Python,
|
||||
# reformatting fails and the entire thing is skipped.
|
||||
def markdown_skipped_unclosed_non_python():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
|
||||
I forgot to close the code block, and this is definitely not
|
||||
Python. So nothing here gets formatted.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# This has a Python snippet with a docstring that contains a closing fence.
|
||||
# This splits the embedded docstring and makes the overall snippet invalid.
|
||||
def markdown_skipped_accidental_closure():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
'''
|
||||
```
|
||||
'''
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# When a line is unindented all the way out before the standard indent of the
|
||||
# docstring, the code reformatting ends up interacting poorly with the standard
|
||||
# docstring whitespace normalization logic. This is probably a bug, and we
|
||||
# should probably treat the Markdown block as valid, but for now, we detect
|
||||
# the unindented line and declare the block as invalid and thus do no code
|
||||
# reformatting.
|
||||
#
|
||||
# FIXME: Fixing this (if we think it's a bug) probably requires refactoring the
|
||||
# docstring whitespace normalization to be aware of code snippets. Or perhaps
|
||||
# plausibly, to do normalization *after* code snippets have been formatted.
|
||||
def markdown_skipped_unindented_completely():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# This test is fallout from treating fenced code blocks with unindented lines
|
||||
# as invalid. We probably should treat this as a valid block. Indeed, if we
|
||||
# remove the logic that makes the `markdown_skipped_unindented_completely` test
|
||||
# pass, then this code snippet will get reformatted correctly.
|
||||
def markdown_skipped_unindented_somewhat():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# This tests that if a Markdown block contains a line that has less of an
|
||||
# indent than another line.
|
||||
#
|
||||
# There is some judgment involved in what the right behavior is here. We
|
||||
# could "normalize" the indentation so that the minimum is the indent of the
|
||||
# opening fence line. If we did that here, then the code snippet would become
|
||||
# valid and format as Python. But at time of writing, we don't, which leads to
|
||||
# inconsistent indentation and thus invalid Python.
|
||||
def markdown_skipped_unindented_with_inconsistent_indentation():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
cool_stuff( 1 )
|
||||
cool_stuff( 2 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_skipped_doctest():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
>>> cool_stuff( 1 )
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_skipped_rst_literal():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
And do this::
|
||||
|
||||
cool_stuff( 1 )
|
||||
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def markdown_skipped_rst_directive():
|
||||
"""
|
||||
Do cool stuff.
|
||||
|
||||
```py
|
||||
.. code-block:: python
|
||||
|
||||
cool_stuff( 1 )
|
||||
|
||||
```
|
||||
|
||||
Done.
|
||||
"""
|
||||
pass
|
||||
|
|
|
@ -354,6 +354,16 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
|
|||
)?;
|
||||
}
|
||||
}
|
||||
CodeExampleKind::Markdown(fenced) => {
|
||||
// This looks suspicious, but it's consistent with the whitespace
|
||||
// normalization that will occur anyway.
|
||||
let indent = " ".repeat(fenced.opening_fence_indent.to_usize());
|
||||
for docline in formatted_lines {
|
||||
self.print_one(
|
||||
&docline.map(|line| std::format!("{indent}{line}")),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -648,6 +658,18 @@ impl<'src> CodeExample<'src> {
|
|||
};
|
||||
self.kind = Some(CodeExampleKind::Rst(litblock));
|
||||
}
|
||||
Some(CodeExampleKind::Markdown(fenced)) => {
|
||||
let Some(fenced) = fenced.add_code_line(original, queue) else {
|
||||
// For Markdown, the last line in a block should be printed
|
||||
// as-is. Especially since the last line in many Markdown
|
||||
// fenced code blocks is identical to the start of a code
|
||||
// block. So if we try to start a new code block with
|
||||
// the last line, we risk opening another Markdown block
|
||||
// inappropriately.
|
||||
return;
|
||||
};
|
||||
self.kind = Some(CodeExampleKind::Markdown(fenced));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -681,6 +703,9 @@ impl<'src> CodeExample<'src> {
|
|||
} else if let Some(litblock) = CodeExampleRst::new(original) {
|
||||
self.kind = Some(CodeExampleKind::Rst(litblock));
|
||||
queue.push_back(CodeExampleAddAction::Print { original });
|
||||
} else if let Some(fenced) = CodeExampleMarkdown::new(original) {
|
||||
self.kind = Some(CodeExampleKind::Markdown(fenced));
|
||||
queue.push_back(CodeExampleAddAction::Print { original });
|
||||
} else {
|
||||
queue.push_back(CodeExampleAddAction::Print { original });
|
||||
}
|
||||
|
@ -707,6 +732,10 @@ enum CodeExampleKind<'src> {
|
|||
/// [literal block]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#literal-blocks
|
||||
/// [code block directive]: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-code-block
|
||||
Rst(CodeExampleRst<'src>),
|
||||
/// Code found from a Markdown "[fenced code block]".
|
||||
///
|
||||
/// [fenced code block]: https://spec.commonmark.org/0.30/#fenced-code-blocks
|
||||
Markdown(CodeExampleMarkdown<'src>),
|
||||
}
|
||||
|
||||
impl<'src> CodeExampleKind<'src> {
|
||||
|
@ -718,6 +747,7 @@ impl<'src> CodeExampleKind<'src> {
|
|||
match *self {
|
||||
CodeExampleKind::Doctest(ref doctest) => &doctest.lines,
|
||||
CodeExampleKind::Rst(ref mut litblock) => litblock.indented_code(),
|
||||
CodeExampleKind::Markdown(ref fenced) => &fenced.lines,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -731,6 +761,7 @@ impl<'src> CodeExampleKind<'src> {
|
|||
match self {
|
||||
CodeExampleKind::Doctest(doctest) => doctest.lines,
|
||||
CodeExampleKind::Rst(litblock) => litblock.lines,
|
||||
CodeExampleKind::Markdown(fenced) => fenced.lines,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1156,6 +1187,227 @@ impl<'src> CodeExampleRst<'src> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Represents a code example extracted from a Markdown [fenced code block].
|
||||
///
|
||||
/// [fenced code block]: https://spec.commonmark.org/0.30/#fenced-code-blocks
|
||||
#[derive(Debug)]
|
||||
struct CodeExampleMarkdown<'src> {
|
||||
/// The lines that have been seen so far that make up the block.
|
||||
lines: Vec<CodeExampleLine<'src>>,
|
||||
|
||||
/// The indent of the line "opening" fence of this block measured via
|
||||
/// `indentation_length`.
|
||||
///
|
||||
/// This indentation is trimmed from the indentation of every line in the
|
||||
/// body of the code block,
|
||||
opening_fence_indent: TextSize,
|
||||
|
||||
/// The kind of fence, backticks or tildes, used for this block. We need to
|
||||
/// keep track of which kind was used to open the block in order to look
|
||||
/// for a correct close of the block.
|
||||
fence_kind: MarkdownFenceKind,
|
||||
|
||||
/// The size of the fence, in codepoints, in the opening line. A correct
|
||||
/// close of the fence must use *at least* this many characters. In other
|
||||
/// words, this is the number of backticks or tildes that opened the fenced
|
||||
/// code block.
|
||||
fence_len: usize,
|
||||
}
|
||||
|
||||
impl<'src> CodeExampleMarkdown<'src> {
|
||||
/// Looks for the start of a Markdown [fenced code block].
|
||||
///
|
||||
/// If the start of a block is found, then this returns a correctly
|
||||
/// initialized Markdown code block. Callers should print the line as given
|
||||
/// as it is not retained as part of the block.
|
||||
///
|
||||
/// [fenced code block]: https://spec.commonmark.org/0.30/#fenced-code-blocks
|
||||
fn new(original: InputDocstringLine<'src>) -> Option<CodeExampleMarkdown<'src>> {
|
||||
static FENCE_START: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(
|
||||
r"(?xm)
|
||||
^
|
||||
(?:
|
||||
# In the backtick case, info strings (following the fence)
|
||||
# cannot contain backticks themselves, since it would
|
||||
# introduce ambiguity with parsing inline code. In other
|
||||
# words, if we didn't specifically exclude matching `
|
||||
# in the info string for backtick fences, then we might
|
||||
# erroneously consider something to be a code fence block
|
||||
# that is actually inline code.
|
||||
#
|
||||
# NOTE: The `ticklang` and `tildlang` capture groups are
|
||||
# currently unused, but there was some discussion about not
|
||||
# assuming unlabeled blocks were Python. At the time of
|
||||
# writing, we do assume unlabeled blocks are Python, but
|
||||
# one could inspect the `ticklang` and `tildlang` capture
|
||||
# groups to determine whether the block is labeled or not.
|
||||
(?<ticks>```+)(?:\s*(?<ticklang>(?i:python|py|python3|py3))[^`]*)?
|
||||
|
|
||||
(?<tilds>~~~+)(?:\s*(?<tildlang>(?i:python|py|python3|py3))\p{any}*)?
|
||||
)
|
||||
$
|
||||
",
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
let (opening_fence_indent, rest) = indent_with_suffix(original.line);
|
||||
// Quit quickly in the vast majority of cases.
|
||||
if !rest.starts_with("```") && !rest.starts_with("~~~") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let caps = FENCE_START.captures(rest)?;
|
||||
let (fence_kind, fence_len) = if let Some(ticks) = caps.name("ticks") {
|
||||
(MarkdownFenceKind::Backtick, ticks.as_str().chars().count())
|
||||
} else {
|
||||
let tildes = caps
|
||||
.name("tilds")
|
||||
.expect("no ticks means it must be tildes");
|
||||
(MarkdownFenceKind::Tilde, tildes.as_str().chars().count())
|
||||
};
|
||||
Some(CodeExampleMarkdown {
|
||||
lines: vec![],
|
||||
opening_fence_indent: indentation_length(opening_fence_indent),
|
||||
fence_kind,
|
||||
fence_len,
|
||||
})
|
||||
}
|
||||
|
||||
/// Attempts to add the given line from a docstring to the Markdown code
|
||||
/// snippet being collected.
|
||||
///
|
||||
/// In this case, ownership is only not returned when the end of the block
|
||||
/// was found, or if the block was determined to be invalid. A formatting
|
||||
/// action is then pushed onto the queue.
|
||||
fn add_code_line(
|
||||
mut self,
|
||||
original: InputDocstringLine<'src>,
|
||||
queue: &mut VecDeque<CodeExampleAddAction<'src>>,
|
||||
) -> Option<CodeExampleMarkdown<'src>> {
|
||||
if self.is_end(original) {
|
||||
queue.push_back(self.into_format_action());
|
||||
queue.push_back(CodeExampleAddAction::Print { original });
|
||||
return None;
|
||||
}
|
||||
// When a line in a Markdown fenced closed block is indented *less*
|
||||
// than the opening indent, we treat the entire block as invalid.
|
||||
//
|
||||
// I believe that code blocks of this form are actually valid Markdown
|
||||
// in some cases, but the interplay between it and our docstring
|
||||
// whitespace normalization leads to undesirable outcomes. For example,
|
||||
// if the line here is unindented out beyond the initial indent of the
|
||||
// docstring itself, then this causes the entire docstring to have
|
||||
// its indent normalized. And, at the time of writing, a subsequent
|
||||
// formatting run undoes this indentation, thus violating idempotency.
|
||||
if !original.line.trim_whitespace().is_empty()
|
||||
&& indentation_length(original.line) < self.opening_fence_indent
|
||||
{
|
||||
queue.push_back(self.into_reset_action());
|
||||
queue.push_back(CodeExampleAddAction::Print { original });
|
||||
return None;
|
||||
}
|
||||
self.push(original);
|
||||
queue.push_back(CodeExampleAddAction::Kept);
|
||||
Some(self)
|
||||
}
|
||||
|
||||
/// Returns true when given line ends this fenced code block.
|
||||
fn is_end(&self, original: InputDocstringLine<'src>) -> bool {
|
||||
let (_, rest) = indent_with_suffix(original.line);
|
||||
// We can bail early if we don't have at least three backticks or
|
||||
// tildes.
|
||||
if !rest.starts_with("```") && !rest.starts_with("~~~") {
|
||||
return false;
|
||||
}
|
||||
// We do need to check that we have the right number of
|
||||
// backticks/tildes...
|
||||
let fence_len = rest
|
||||
.chars()
|
||||
.take_while(|&ch| ch == self.fence_kind.to_char())
|
||||
.count();
|
||||
// A closing fence only needs *at least* the number of ticks/tildes
|
||||
// that are in the opening fence.
|
||||
if fence_len < self.fence_len {
|
||||
return false;
|
||||
}
|
||||
// And, also, there can only be trailing whitespace. Nothing else.
|
||||
assert!(
|
||||
self.fence_kind.to_char().is_ascii(),
|
||||
"fence char should be ASCII",
|
||||
);
|
||||
if !rest[fence_len..].chars().all(is_python_whitespace) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Pushes the given line as part of this code example.
|
||||
fn push(&mut self, original: InputDocstringLine<'src>) {
|
||||
// Unlike reStructuredText blocks, for Markdown fenced code blocks, the
|
||||
// indentation that we want to strip from each line is known when the
|
||||
// block is opened. So we can strip it as we collect lines.
|
||||
let code = indentation_trim(self.opening_fence_indent, original.line);
|
||||
self.lines.push(CodeExampleLine { original, code });
|
||||
}
|
||||
|
||||
/// Consume this block and turn it into a reset action.
|
||||
///
|
||||
/// This occurs when we started collecting a code example from something
|
||||
/// that looked like a block, but later determined that it wasn't a valid
|
||||
/// block.
|
||||
fn into_format_action(self) -> CodeExampleAddAction<'src> {
|
||||
// Note that unlike in reStructuredText blocks, if a Markdown fenced
|
||||
// code block is unclosed, then *all* remaining lines should be treated
|
||||
// as part of the block[1]:
|
||||
//
|
||||
// > If the end of the containing block (or document) is reached and no
|
||||
// > closing code fence has been found, the code block contains all of the
|
||||
// > lines after the opening code fence until the end of the containing
|
||||
// > block (or document).
|
||||
//
|
||||
// This means that we don't need to try and trim trailing empty lines.
|
||||
// Those will get fed into the code formatter and ultimately stripped,
|
||||
// which is what you'd expect if those lines are treated as part of the
|
||||
// block.
|
||||
//
|
||||
// [1]: https://spec.commonmark.org/0.30/#fenced-code-blocks
|
||||
CodeExampleAddAction::Format {
|
||||
kind: CodeExampleKind::Markdown(self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume this block and turn it into a reset action.
|
||||
///
|
||||
/// This occurs when we started collecting a code example from something
|
||||
/// that looked like a code fence, but later determined that it wasn't a
|
||||
/// valid.
|
||||
fn into_reset_action(self) -> CodeExampleAddAction<'src> {
|
||||
CodeExampleAddAction::Reset { code: self.lines }
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of fence used in a Markdown code block.
|
||||
///
|
||||
/// This indicates that the fence is either surrounded by fences made from
|
||||
/// backticks, or fences made from tildes.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum MarkdownFenceKind {
|
||||
Backtick,
|
||||
Tilde,
|
||||
}
|
||||
|
||||
impl MarkdownFenceKind {
|
||||
/// Convert the fence kind to the actual character used to build the fence.
|
||||
fn to_char(self) -> char {
|
||||
match self {
|
||||
MarkdownFenceKind::Backtick => '`',
|
||||
MarkdownFenceKind::Tilde => '~',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single line in a code example found in a docstring.
|
||||
///
|
||||
/// A code example line exists prior to formatting, and is thus in full
|
||||
|
@ -1306,7 +1558,7 @@ fn indentation_trim(indent_len: TextSize, line: &str) -> &str {
|
|||
break;
|
||||
}
|
||||
}
|
||||
line
|
||||
trimmed
|
||||
}
|
||||
|
||||
/// Returns the indentation of the given line and everything following it.
|
||||
|
|
|
@ -82,6 +82,10 @@ impl Transformer for Normalizer {
|
|||
// everything after it. Talk about a hammer.
|
||||
Regex::new(r#"::(?s:.*)"#).unwrap()
|
||||
});
|
||||
static STRIP_MARKDOWN_BLOCKS: Lazy<Regex> = Lazy::new(|| {
|
||||
// This covers more than valid Markdown blocks, but that's OK.
|
||||
Regex::new(r#"(```|~~~)\p{any}*(```|~~~|$)"#).unwrap()
|
||||
});
|
||||
|
||||
// Start by (1) stripping everything that looks like a code
|
||||
// snippet, since code snippets may be completely reformatted if
|
||||
|
@ -98,6 +102,12 @@ impl Transformer for Normalizer {
|
|||
"<RSTBLOCK-CODE-SNIPPET: Removed by normalizer>\n",
|
||||
)
|
||||
.into_owned();
|
||||
string_literal.value = STRIP_MARKDOWN_BLOCKS
|
||||
.replace_all(
|
||||
&string_literal.value,
|
||||
"<MARKDOWN-CODE-SNIPPET: Removed by normalizer>\n",
|
||||
)
|
||||
.into_owned();
|
||||
// Normalize a string by (2) stripping any leading and trailing space from each
|
||||
// line, and (3) removing any blank lines from the start and end of the string.
|
||||
string_literal.value = string_literal
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue