Docstring formatting: Preserve tab indentation when using indent-style=tabs (#9915)

This commit is contained in:
Micha Reiser 2024-02-12 16:09:13 +01:00 committed by GitHub
parent 4946a1876f
commit 8657a392ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 678 additions and 99 deletions

View file

@ -4,4 +4,8 @@ ij_formatter_enabled = false
["range_formatting/*.py"] ["range_formatting/*.py"]
generated_code = true generated_code = true
ij_formatter_enabled = false
[docstring_tab_indentation.py]
generated_code = true
ij_formatter_enabled = false ij_formatter_enabled = false

View file

@ -0,0 +1,10 @@
[
{
"indent_style": "tab",
"indent_width": 4
},
{
"indent_style": "tab",
"indent_width": 8
}
]

View file

@ -0,0 +1,72 @@
# Tests the behavior of the formatter when it comes to tabs inside docstrings
# when using `indent_style="tab`
# The example below uses tabs exclusively. The formatter should preserve the tab indentation
# of `arg1`.
def tab_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with 2 tabs in front
"""
# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab
# because it must assume that the spaces are used for alignment and not indentation.
def space_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
def under_indented(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def under_indented_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def spaces_tabs_over_indent(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The docstring itself is indented with spaces but the argument is indented by a tab.
# Keep the tab indentation of the argument, convert th docstring indent to tabs.
def space_indented_docstring_containing_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg
"""
# The docstring uses tabs, spaces, tabs indentation.
# Fallback to use space indentation
def mixed_indentation(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The example shows an ascii art. The formatter should not change the spaces
# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)`
# when using an indent width other than 8.
def ascii_art():
r"""
Look at this beautiful tree.
a
/ \
b c
/ \
d e
"""

View file

@ -2,11 +2,13 @@
// "reStructuredText." // "reStructuredText."
#![allow(clippy::doc_markdown)] #![allow(clippy::doc_markdown)]
use std::cmp::Ordering;
use std::{borrow::Cow, collections::VecDeque}; use std::{borrow::Cow, collections::VecDeque};
use itertools::Itertools;
use ruff_formatter::printer::SourceMapGeneration; use ruff_formatter::printer::SourceMapGeneration;
use ruff_python_parser::ParseError; use ruff_python_parser::ParseError;
use {once_cell::sync::Lazy, regex::Regex}; use {once_cell::sync::Lazy, regex::Regex};
use { use {
ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed}, ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed},
@ -80,9 +82,7 @@ use super::{NormalizedString, QuoteChar};
/// ``` /// ```
/// ///
/// Tabs are counted by padding them to the next multiple of 8 according to /// Tabs are counted by padding them to the next multiple of 8 according to
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs). When /// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs).
/// we see indentation that contains a tab or any other none ascii-space whitespace we rewrite the
/// string.
/// ///
/// Additionally, if any line in the docstring has less indentation than the docstring /// Additionally, if any line in the docstring has less indentation than the docstring
/// (effectively a negative indentation wrt. to the current level), we pad all lines to the /// (effectively a negative indentation wrt. to the current level), we pad all lines to the
@ -104,6 +104,10 @@ use super::{NormalizedString, QuoteChar};
/// line c /// line c
/// """ /// """
/// ``` /// ```
/// The indentation is rewritten to all-spaces when using [`IndentStyle::Space`].
/// The formatter preserves tab-indentations when using [`IndentStyle::Tab`], but doesn't convert
/// `indent-width * spaces` to tabs because doing so could break ASCII art and other docstrings
/// that use spaces for alignment.
pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> { pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
let docstring = &normalized.text; let docstring = &normalized.text;
@ -176,19 +180,19 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
// align it with the docstring statement. Conversely, if all lines are over-indented, we strip // align it with the docstring statement. Conversely, if all lines are over-indented, we strip
// the extra indentation. We call this stripped indentation since it's relative to the block // the extra indentation. We call this stripped indentation since it's relative to the block
// indent printer-made indentation. // indent printer-made indentation.
let stripped_indentation_length = lines let stripped_indentation = lines
.clone() .clone()
// We don't want to count whitespace-only lines as miss-indented // We don't want to count whitespace-only lines as miss-indented
.filter(|line| !line.trim().is_empty()) .filter(|line| !line.trim().is_empty())
.map(indentation_length) .map(Indentation::from_str)
.min() .min_by_key(|indentation| indentation.width())
.unwrap_or_default(); .unwrap_or_default();
DocstringLinePrinter { DocstringLinePrinter {
f, f,
action_queue: VecDeque::new(), action_queue: VecDeque::new(),
offset, offset,
stripped_indentation_length, stripped_indentation,
already_normalized, already_normalized,
quote_char: normalized.quotes.quote_char, quote_char: normalized.quotes.quote_char,
code_example: CodeExample::default(), code_example: CodeExample::default(),
@ -240,9 +244,9 @@ struct DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
/// printed. /// printed.
offset: TextSize, offset: TextSize,
/// Indentation alignment (in columns) based on the least indented line in the /// Indentation alignment based on the least indented line in the
/// docstring. /// docstring.
stripped_indentation_length: usize, stripped_indentation: Indentation,
/// Whether the docstring is overall already considered normalized. When it /// Whether the docstring is overall already considered normalized. When it
/// is, the formatter can take a fast path. /// is, the formatter can take a fast path.
@ -345,7 +349,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
}; };
// This looks suspicious, but it's consistent with the whitespace // This looks suspicious, but it's consistent with the whitespace
// normalization that will occur anyway. // normalization that will occur anyway.
let indent = " ".repeat(min_indent); let indent = " ".repeat(min_indent.width());
for docline in formatted_lines { for docline in formatted_lines {
self.print_one( self.print_one(
&docline.map(|line| std::format!("{indent}{line}")), &docline.map(|line| std::format!("{indent}{line}")),
@ -355,7 +359,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
CodeExampleKind::Markdown(fenced) => { CodeExampleKind::Markdown(fenced) => {
// This looks suspicious, but it's consistent with the whitespace // This looks suspicious, but it's consistent with the whitespace
// normalization that will occur anyway. // normalization that will occur anyway.
let indent = " ".repeat(fenced.opening_fence_indent); let indent = " ".repeat(fenced.opening_fence_indent.width());
for docline in formatted_lines { for docline in formatted_lines {
self.print_one( self.print_one(
&docline.map(|line| std::format!("{indent}{line}")), &docline.map(|line| std::format!("{indent}{line}")),
@ -387,12 +391,58 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
}; };
} }
let tab_or_non_ascii_space = trim_end let indent_offset = match self.f.options().indent_style() {
.chars() // Normalize all indent to spaces.
.take_while(|c| c.is_whitespace()) IndentStyle::Space => {
.any(|c| c != ' '); let tab_or_non_ascii_space = trim_end
.chars()
.take_while(|c| c.is_whitespace())
.any(|c| c != ' ');
if tab_or_non_ascii_space { if tab_or_non_ascii_space {
None
} else {
// It's guaranteed that the `indent` is all spaces because `tab_or_non_ascii_space` is
// `false` (indent contains neither tabs nor non-space whitespace).
let stripped_indentation_len = self.stripped_indentation.text_len();
// Take the string with the trailing whitespace removed, then also
// skip the leading whitespace.
Some(stripped_indentation_len)
}
}
IndentStyle::Tab => {
let line_indent = Indentation::from_str(trim_end);
let non_ascii_whitespace = trim_end
.chars()
.take_while(|c| c.is_whitespace())
.any(|c| !matches!(c, ' ' | '\t'));
let trimmed = line_indent.trim_start(self.stripped_indentation);
// Preserve tabs that are used for indentation, but only if the indent isn't
// * a mix of tabs and spaces
// * the `stripped_indentation` is a prefix of the line's indent
// * the trimmed indent isn't spaces followed by tabs because that would result in a
// mixed tab, spaces, tab indentation, resulting in instabilities.
let preserve_indent = !non_ascii_whitespace
&& trimmed.is_some_and(|trimmed| !trimmed.is_spaces_tabs());
preserve_indent.then_some(self.stripped_indentation.text_len())
}
};
if let Some(indent_offset) = indent_offset {
// Take the string with the trailing whitespace removed, then also
// skip the leading whitespace.
if self.already_normalized {
let trimmed_line_range =
TextRange::at(line.offset, trim_end.text_len()).add_start(indent_offset);
source_text_slice(trimmed_line_range).fmt(self.f)?;
} else {
text(&trim_end[indent_offset.to_usize()..]).fmt(self.f)?;
}
} else {
// We strip the indentation that is shared with the docstring // We strip the indentation that is shared with the docstring
// statement, unless a line was indented less than the docstring // statement, unless a line was indented less than the docstring
// statement, in which case we strip only this much indentation to // statement, in which case we strip only this much indentation to
@ -400,24 +450,11 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
// overindented, in which case we strip the additional whitespace // overindented, in which case we strip the additional whitespace
// (see example in [`format_docstring`] doc comment). We then // (see example in [`format_docstring`] doc comment). We then
// prepend the in-docstring indentation to the string. // prepend the in-docstring indentation to the string.
let indent_len = indentation_length(trim_end) - self.stripped_indentation_length; let indent_len =
Indentation::from_str(trim_end).width() - self.stripped_indentation.width();
let in_docstring_indent = " ".repeat(indent_len) + trim_end.trim_start(); let in_docstring_indent = " ".repeat(indent_len) + trim_end.trim_start();
text(&in_docstring_indent).fmt(self.f)?; text(&in_docstring_indent).fmt(self.f)?;
} else { };
// It's guaranteed that the `indent` is all spaces because `tab_or_non_ascii_space` is
// `false` (indent contains neither tabs nor non-space whitespace).
// Take the string with the trailing whitespace removed, then also
// skip the leading whitespace.
let trimmed_line_range = TextRange::at(line.offset, trim_end.text_len())
.add_start(TextSize::try_from(self.stripped_indentation_length).unwrap());
if self.already_normalized {
source_text_slice(trimmed_line_range).fmt(self.f)?;
} else {
// All indents are ascii spaces, so the slicing is correct.
text(&trim_end[self.stripped_indentation_length..]).fmt(self.f)?;
}
}
// We handled the case that the closing quotes are on their own line // We handled the case that the closing quotes are on their own line
// above (the last line is empty except for whitespace). If they are on // above (the last line is empty except for whitespace). If they are on
@ -898,8 +935,7 @@ struct CodeExampleRst<'src> {
/// The lines that have been seen so far that make up the block. /// The lines that have been seen so far that make up the block.
lines: Vec<CodeExampleLine<'src>>, lines: Vec<CodeExampleLine<'src>>,
/// The indent of the line "opening" this block measured via /// The indent of the line "opening" this block in columns.
/// `indentation_length` (in columns).
/// ///
/// It can either be the indent of a line ending with `::` (for a literal /// It can either be the indent of a line ending with `::` (for a literal
/// block) or the indent of a line starting with `.. ` (a directive). /// block) or the indent of a line starting with `.. ` (a directive).
@ -907,9 +943,9 @@ struct CodeExampleRst<'src> {
/// The content body of a block needs to be indented more than the line /// The content body of a block needs to be indented more than the line
/// opening the block, so we use this indentation to look for indentation /// opening the block, so we use this indentation to look for indentation
/// that is "more than" it. /// that is "more than" it.
opening_indent: usize, opening_indent: Indentation,
/// The minimum indent of the block measured via `indentation_length`. /// The minimum indent of the block in columns.
/// ///
/// This is `None` until the first such line is seen. If no such line is /// This is `None` until the first such line is seen. If no such line is
/// found, then we consider it an invalid block and bail out of trying to /// found, then we consider it an invalid block and bail out of trying to
@ -926,7 +962,7 @@ struct CodeExampleRst<'src> {
/// When the code snippet has been extracted, it is re-built before being /// When the code snippet has been extracted, it is re-built before being
/// reformatted. The minimum indent is stripped from each line when it is /// reformatted. The minimum indent is stripped from each line when it is
/// re-built. /// re-built.
min_indent: Option<usize>, min_indent: Option<Indentation>,
/// Whether this is a directive block or not. When not a directive, this is /// Whether this is a directive block or not. When not a directive, this is
/// a literal block. The main difference between them is that they start /// a literal block. The main difference between them is that they start
@ -975,7 +1011,7 @@ impl<'src> CodeExampleRst<'src> {
} }
Some(CodeExampleRst { Some(CodeExampleRst {
lines: vec![], lines: vec![],
opening_indent: indentation_length(opening_indent), opening_indent: Indentation::from_str(opening_indent),
min_indent: None, min_indent: None,
is_directive: false, is_directive: false,
}) })
@ -1013,7 +1049,7 @@ impl<'src> CodeExampleRst<'src> {
} }
Some(CodeExampleRst { Some(CodeExampleRst {
lines: vec![], lines: vec![],
opening_indent: indentation_length(original.line), opening_indent: Indentation::from_str(original.line),
min_indent: None, min_indent: None,
is_directive: true, is_directive: true,
}) })
@ -1033,7 +1069,7 @@ impl<'src> CodeExampleRst<'src> {
line.code = if line.original.line.trim().is_empty() { line.code = if line.original.line.trim().is_empty() {
"" ""
} else { } else {
indentation_trim(min_indent, line.original.line) min_indent.trim_start_str(line.original.line)
}; };
} }
&self.lines &self.lines
@ -1070,7 +1106,9 @@ impl<'src> CodeExampleRst<'src> {
// an empty line followed by an unindented non-empty line. // an empty line followed by an unindented non-empty line.
if let Some(next) = original.next { if let Some(next) = original.next {
let (next_indent, next_rest) = indent_with_suffix(next); let (next_indent, next_rest) = indent_with_suffix(next);
if !next_rest.is_empty() && indentation_length(next_indent) <= self.opening_indent { if !next_rest.is_empty()
&& Indentation::from_str(next_indent) <= self.opening_indent
{
self.push_format_action(queue); self.push_format_action(queue);
return None; return None;
} }
@ -1082,7 +1120,7 @@ impl<'src> CodeExampleRst<'src> {
queue.push_back(CodeExampleAddAction::Kept); queue.push_back(CodeExampleAddAction::Kept);
return Some(self); return Some(self);
} }
let indent_len = indentation_length(indent); let indent_len = Indentation::from_str(indent);
if indent_len <= self.opening_indent { if indent_len <= self.opening_indent {
// If we find an unindented non-empty line at the same (or less) // If we find an unindented non-empty line at the same (or less)
// indentation of the opening line at this point, then we know it // indentation of the opening line at this point, then we know it
@ -1144,7 +1182,7 @@ impl<'src> CodeExampleRst<'src> {
queue.push_back(CodeExampleAddAction::Print { original }); queue.push_back(CodeExampleAddAction::Print { original });
return Some(self); return Some(self);
} }
let min_indent = indentation_length(indent); let min_indent = Indentation::from_str(indent);
// At this point, we found a non-empty line. The only thing we require // At this point, we found a non-empty line. The only thing we require
// is that its indentation is strictly greater than the indentation of // is that its indentation is strictly greater than the indentation of
// the line containing the `::`. Otherwise, we treat this as an invalid // the line containing the `::`. Otherwise, we treat this as an invalid
@ -1218,12 +1256,11 @@ struct CodeExampleMarkdown<'src> {
/// The lines that have been seen so far that make up the block. /// The lines that have been seen so far that make up the block.
lines: Vec<CodeExampleLine<'src>>, lines: Vec<CodeExampleLine<'src>>,
/// The indent of the line "opening" fence of this block measured via /// The indent of the line "opening" fence of this block in columns.
/// `indentation_length` (in columns).
/// ///
/// This indentation is trimmed from the indentation of every line in the /// This indentation is trimmed from the indentation of every line in the
/// body of the code block, /// body of the code block,
opening_fence_indent: usize, opening_fence_indent: Indentation,
/// The kind of fence, backticks or tildes, used for this block. We need to /// The kind of fence, backticks or tildes, used for this block. We need to
/// keep track of which kind was used to open the block in order to look /// keep track of which kind was used to open the block in order to look
@ -1292,7 +1329,7 @@ impl<'src> CodeExampleMarkdown<'src> {
}; };
Some(CodeExampleMarkdown { Some(CodeExampleMarkdown {
lines: vec![], lines: vec![],
opening_fence_indent: indentation_length(opening_fence_indent), opening_fence_indent: Indentation::from_str(opening_fence_indent),
fence_kind, fence_kind,
fence_len, fence_len,
}) })
@ -1325,7 +1362,7 @@ impl<'src> CodeExampleMarkdown<'src> {
// its indent normalized. And, at the time of writing, a subsequent // its indent normalized. And, at the time of writing, a subsequent
// formatting run undoes this indentation, thus violating idempotency. // formatting run undoes this indentation, thus violating idempotency.
if !original.line.trim_whitespace().is_empty() if !original.line.trim_whitespace().is_empty()
&& indentation_length(original.line) < self.opening_fence_indent && Indentation::from_str(original.line) < self.opening_fence_indent
{ {
queue.push_back(self.into_reset_action()); queue.push_back(self.into_reset_action());
queue.push_back(CodeExampleAddAction::Print { original }); queue.push_back(CodeExampleAddAction::Print { original });
@ -1371,7 +1408,7 @@ impl<'src> CodeExampleMarkdown<'src> {
// Unlike reStructuredText blocks, for Markdown fenced code blocks, the // Unlike reStructuredText blocks, for Markdown fenced code blocks, the
// indentation that we want to strip from each line is known when the // indentation that we want to strip from each line is known when the
// block is opened. So we can strip it as we collect lines. // block is opened. So we can strip it as we collect lines.
let code = indentation_trim(self.opening_fence_indent, original.line); let code = self.opening_fence_indent.trim_start_str(original.line);
self.lines.push(CodeExampleLine { original, code }); self.lines.push(CodeExampleLine { original, code });
} }
@ -1486,7 +1523,6 @@ enum CodeExampleAddAction<'src> {
/// results in that code example becoming invalid. In this case, /// results in that code example becoming invalid. In this case,
/// we don't want to treat it as a code example, but instead write /// we don't want to treat it as a code example, but instead write
/// back the lines to the docstring unchanged. /// back the lines to the docstring unchanged.
#[allow(dead_code)] // FIXME: remove when reStructuredText support is added
Reset { Reset {
/// The lines of code that we collected but should be printed back to /// The lines of code that we collected but should be printed back to
/// the docstring as-is and not formatted. /// the docstring as-is and not formatted.
@ -1537,53 +1573,241 @@ fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1 || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
} }
/// Returns the indentation's visual width in columns/spaces. #[derive(Copy, Clone, Debug)]
/// enum Indentation {
/// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up /// Space only indentation or an empty indentation.
/// to the next multiple of 8. This is effectively a port of ///
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs), /// The value is the number of spaces.
/// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61). Spaces(usize),
fn indentation_length(line: &str) -> usize {
let mut indentation = 0usize; /// Tabs only indentation.
for char in line.chars() { Tabs(usize),
if char == '\t' {
// Pad to the next multiple of tab_width /// Indentation that uses tabs followed by spaces.
indentation += 8 - (indentation.rem_euclid(8)); /// Also known as smart tabs where tabs are used for indents, and spaces for alignment.
} else if char.is_whitespace() { TabSpaces { tabs: usize, spaces: usize },
indentation += char.len_utf8();
} else { /// Indentation that uses spaces followed by tabs.
break; SpacesTabs { spaces: usize, tabs: usize },
}
} /// Mixed indentation of tabs and spaces.
indentation Mixed {
/// The visual width of the indentation in columns.
width: usize,
/// The length of the indentation in bytes
len: TextSize,
},
} }
/// Trims at most `indent_len` indentation from the beginning of `line`. impl Indentation {
/// const TAB_INDENT_WIDTH: usize = 8;
/// This treats indentation in precisely the same way as `indentation_length`.
/// As such, it is expected that `indent_len` is computed from fn from_str(s: &str) -> Self {
/// `indentation_length`. This is useful when one needs to trim some minimum let mut iter = s.chars().peekable();
/// level of indentation from a code snippet collected from a docstring before
/// attempting to reformat it. let spaces = iter.peeking_take_while(|c| *c == ' ').count();
fn indentation_trim(indent_len: usize, line: &str) -> &str { let tabs = iter.peeking_take_while(|c| *c == '\t').count();
let mut seen_indent_len = 0;
let mut trimmed = line; if tabs == 0 {
for char in line.chars() { // No indent, or spaces only indent
if seen_indent_len >= indent_len { return Self::Spaces(spaces);
return trimmed;
} }
if char == '\t' {
// Pad to the next multiple of tab_width let align_spaces = iter.peeking_take_while(|c| *c == ' ').count();
seen_indent_len += 8 - (seen_indent_len.rem_euclid(8));
trimmed = &trimmed[1..]; if spaces == 0 {
} else if char.is_whitespace() { if align_spaces == 0 {
seen_indent_len += char.len_utf8(); return Self::Tabs(tabs);
trimmed = &trimmed[char.len_utf8()..]; }
} else {
break; // At this point it's either a smart tab (tabs followed by spaces) or a wild mix of tabs and spaces.
if iter.peek().copied() != Some('\t') {
return Self::TabSpaces {
tabs,
spaces: align_spaces,
};
}
} else if align_spaces == 0 {
return Self::SpacesTabs { spaces, tabs };
}
// Sequence of spaces.. tabs, spaces, tabs...
let mut width = spaces + tabs * Self::TAB_INDENT_WIDTH + align_spaces;
// SAFETY: Safe because Ruff doesn't support files larger than 4GB.
let mut len = TextSize::try_from(spaces + tabs + align_spaces).unwrap();
for char in iter {
if char == '\t' {
// Pad to the next multiple of tab_width
width += Self::TAB_INDENT_WIDTH - (width.rem_euclid(Self::TAB_INDENT_WIDTH));
len += '\t'.text_len();
} else if char.is_whitespace() {
width += char.len_utf8();
len += char.text_len();
} else {
break;
}
}
// Mixed tabs and spaces
Self::Mixed { width, len }
}
/// Returns the indentation's visual width in columns/spaces.
///
/// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up
/// to the next multiple of 8. This is effectively a port of
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs),
/// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61).
const fn width(self) -> usize {
match self {
Self::Spaces(count) => count,
Self::Tabs(count) => count * Self::TAB_INDENT_WIDTH,
Self::TabSpaces { tabs, spaces } => tabs * Self::TAB_INDENT_WIDTH + spaces,
Self::SpacesTabs { spaces, tabs } => {
let mut indent = spaces;
indent += Self::TAB_INDENT_WIDTH - indent.rem_euclid(Self::TAB_INDENT_WIDTH);
indent + (tabs - 1) * Self::TAB_INDENT_WIDTH
}
Self::Mixed { width, .. } => width,
} }
} }
trimmed
/// Returns the length of the indentation in bytes.
///
/// # Panics
/// If the indentation is longer than 4GB.
fn text_len(self) -> TextSize {
let len = match self {
Self::Spaces(count) => count,
Self::Tabs(count) => count,
Self::TabSpaces { tabs, spaces } => tabs + spaces,
Self::SpacesTabs { spaces, tabs } => spaces + tabs,
Self::Mixed { len, .. } => return len,
};
TextSize::try_from(len).unwrap()
}
/// Trims the indent of `rhs` by `self`.
///
/// Returns `None` if `self` is not a prefix of `rhs` or either `self` or `rhs` use mixed indentation.
fn trim_start(self, rhs: Self) -> Option<Self> {
let (left_tabs, left_spaces) = match self {
Self::Spaces(spaces) => (0usize, spaces),
Self::Tabs(tabs) => (tabs, 0usize),
Self::TabSpaces { tabs, spaces } => (tabs, spaces),
// Handle spaces here because it is the only indent where the spaces come before the tabs.
Self::SpacesTabs {
spaces: left_spaces,
tabs: left_tabs,
} => {
return match rhs {
Self::Spaces(right_spaces) => {
left_spaces.checked_sub(right_spaces).map(|spaces| {
if spaces == 0 {
Self::Tabs(left_tabs)
} else {
Self::SpacesTabs {
tabs: left_tabs,
spaces,
}
}
})
}
Self::SpacesTabs {
spaces: right_spaces,
tabs: right_tabs,
} => left_spaces.checked_sub(right_spaces).and_then(|spaces| {
let tabs = left_tabs.checked_sub(right_tabs)?;
Some(if spaces == 0 {
if tabs == 0 {
Self::Spaces(0)
} else {
Self::Tabs(tabs)
}
} else {
Self::SpacesTabs { spaces, tabs }
})
}),
_ => None,
}
}
Self::Mixed { .. } => return None,
};
let (right_tabs, right_spaces) = match rhs {
Self::Spaces(spaces) => (0usize, spaces),
Self::Tabs(tabs) => (tabs, 0usize),
Self::TabSpaces { tabs, spaces } => (tabs, spaces),
Self::SpacesTabs { .. } | Self::Mixed { .. } => return None,
};
let tabs = left_tabs.checked_sub(right_tabs)?;
let spaces = left_spaces.checked_sub(right_spaces)?;
Some(if tabs == 0 {
Self::Spaces(spaces)
} else if spaces == 0 {
Self::Tabs(tabs)
} else {
Self::TabSpaces { tabs, spaces }
})
}
/// Trims at most `indent_len` indentation from the beginning of `line`.
///
/// This is useful when one needs to trim some minimum
/// level of indentation from a code snippet collected from a docstring before
/// attempting to reformat it.
fn trim_start_str(self, line: &str) -> &str {
let mut seen_indent_len = 0;
let mut trimmed = line;
let indent_len = self.width();
for char in line.chars() {
if seen_indent_len >= indent_len {
return trimmed;
}
if char == '\t' {
// Pad to the next multiple of tab_width
seen_indent_len +=
Self::TAB_INDENT_WIDTH - (seen_indent_len.rem_euclid(Self::TAB_INDENT_WIDTH));
trimmed = &trimmed[1..];
} else if char.is_whitespace() {
seen_indent_len += char.len_utf8();
trimmed = &trimmed[char.len_utf8()..];
} else {
break;
}
}
trimmed
}
const fn is_spaces_tabs(self) -> bool {
matches!(self, Self::SpacesTabs { .. })
}
}
impl PartialOrd for Indentation {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.width().cmp(&other.width()))
}
}
impl PartialEq for Indentation {
fn eq(&self, other: &Self) -> bool {
self.width() == other.width()
}
}
impl Default for Indentation {
fn default() -> Self {
Self::Spaces(0)
}
} }
/// Returns the indentation of the given line and everything following it. /// Returns the indentation of the given line and everything following it.
@ -1613,14 +1837,13 @@ fn is_rst_option(line: &str) -> bool {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::string::docstring::Indentation;
use super::indentation_length;
#[test] #[test]
fn test_indentation_like_black() { fn test_indentation_like_black() {
assert_eq!(indentation_length("\t \t \t"), 24); assert_eq!(Indentation::from_str("\t \t \t").width(), 24);
assert_eq!(indentation_length("\t \t"), 24); assert_eq!(Indentation::from_str("\t \t").width(), 24);
assert_eq!(indentation_length("\t\t\t"), 24); assert_eq!(Indentation::from_str("\t\t\t").width(), 24);
assert_eq!(indentation_length(" "), 4); assert_eq!(Indentation::from_str(" ").width(), 4);
} }
} }

View file

@ -0,0 +1,270 @@
---
source: crates/ruff_python_formatter/tests/fixtures.rs
input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_tab_indentation.py
---
## Input
```python
# Tests the behavior of the formatter when it comes to tabs inside docstrings
# when using `indent_style="tab`
# The example below uses tabs exclusively. The formatter should preserve the tab indentation
# of `arg1`.
def tab_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with 2 tabs in front
"""
# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab
# because it must assume that the spaces are used for alignment and not indentation.
def space_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
def under_indented(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def under_indented_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def spaces_tabs_over_indent(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The docstring itself is indented with spaces but the argument is indented by a tab.
# Keep the tab indentation of the argument, convert th docstring indent to tabs.
def space_indented_docstring_containing_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg
"""
# The docstring uses tabs, spaces, tabs indentation.
# Fallback to use space indentation
def mixed_indentation(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The example shows an ascii art. The formatter should not change the spaces
# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)`
# when using an indent width other than 8.
def ascii_art():
r"""
Look at this beautiful tree.
a
/ \
b c
/ \
d e
"""
```
## Outputs
### Output 1
```
indent-style = tab
line-width = 88
indent-width = 4
quote-style = Double
line-ending = LineFeed
magic-trailing-comma = Respect
docstring-code = Disabled
docstring-code-line-width = "dynamic"
preview = Disabled
target_version = Py38
source_type = Python
```
```python
# Tests the behavior of the formatter when it comes to tabs inside docstrings
# when using `indent_style="tab`
# The example below uses tabs exclusively. The formatter should preserve the tab indentation
# of `arg1`.
def tab_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with 2 tabs in front
"""
# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab
# because it must assume that the spaces are used for alignment and not indentation.
def space_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
def under_indented(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def under_indented_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def spaces_tabs_over_indent(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The docstring itself is indented with spaces but the argument is indented by a tab.
# Keep the tab indentation of the argument, convert th docstring indent to tabs.
def space_indented_docstring_containing_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg
"""
# The docstring uses tabs, spaces, tabs indentation.
# Fallback to use space indentation
def mixed_indentation(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The example shows an ascii art. The formatter should not change the spaces
# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)`
# when using an indent width other than 8.
def ascii_art():
r"""
Look at this beautiful tree.
a
/ \
b c
/ \
d e
"""
```
### Output 2
```
indent-style = tab
line-width = 88
indent-width = 8
quote-style = Double
line-ending = LineFeed
magic-trailing-comma = Respect
docstring-code = Disabled
docstring-code-line-width = "dynamic"
preview = Disabled
target_version = Py38
source_type = Python
```
```python
# Tests the behavior of the formatter when it comes to tabs inside docstrings
# when using `indent_style="tab`
# The example below uses tabs exclusively. The formatter should preserve the tab indentation
# of `arg1`.
def tab_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with 2 tabs in front
"""
# The `arg1` is intended with spaces. The formatter should not change the spaces to a tab
# because it must assume that the spaces are used for alignment and not indentation.
def space_argument(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
def under_indented(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def under_indented_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
arg2: Not properly indented
"""
def spaces_tabs_over_indent(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The docstring itself is indented with spaces but the argument is indented by a tab.
# Keep the tab indentation of the argument, convert th docstring indent to tabs.
def space_indented_docstring_containing_tabs(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg
"""
# The docstring uses tabs, spaces, tabs indentation.
# Fallback to use space indentation
def mixed_indentation(arg1: str) -> None:
"""
Arguments:
arg1: super duper arg with a tab and a space in front
"""
# The example shows an ascii art. The formatter should not change the spaces
# to tabs because it breaks the ASCII art when inspecting the docstring with `inspect.cleandoc(ascii_art.__doc__)`
# when using an indent width other than 8.
def ascii_art():
r"""
Look at this beautiful tree.
a
/ \
b c
/ \
d e
"""
```