ruff/crates/ruff_python_formatter/src/expression/string.rs
konsti 01eceaf0dc
Format docstrings (#6452)
**Summary** Implement docstring formatting

**Test Plan** Matches black's `docstring.py` fixture exactly, added some
new cases for what is hard to debug with black and with what black
doesn't cover.

similarity index:

main:
zulip: 0.99702
django: 0.99784
warehouse: 0.99585
build: 0.75623
transformers: 0.99469
cpython: 0.75989
typeshed: 0.74853

this branch:

zulip: 0.99702
django: 0.99784
warehouse: 0.99585
build: 0.75623
transformers: 0.99464
cpython: 0.75517
typeshed: 0.74853

The regression in transformers is actually an improvement in a file they
don't format with black (they run `black examples tests src utils
setup.py conftest.py`, the difference is in hubconf.py). cpython doesn't
use black.

Closes #6196
2023-08-14 12:28:58 +00:00

974 lines
34 KiB
Rust

use std::borrow::Cow;
use bitflags::bitflags;
use ruff_formatter::{format_args, write, FormatError};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::str::is_implicit_concatenation;
use ruff_python_ast::{self as ast, ExprConstant, ExprFString, Ranged};
use ruff_python_parser::lexer::{lex_starts_at, LexicalError, LexicalErrorType};
use ruff_python_parser::{Mode, Tok};
use ruff_source_file::Locator;
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::comments::{leading_comments, trailing_comments};
use crate::expression::parentheses::{
in_parentheses_only_group, in_parentheses_only_soft_line_break_or_space,
};
use crate::expression::Expr;
use crate::prelude::*;
use crate::QuoteStyle;
#[derive(Copy, Clone)]
enum Quoting {
CanChange,
Preserve,
}
pub(super) enum AnyString<'a> {
Constant(&'a ExprConstant),
FString(&'a ExprFString),
}
impl<'a> AnyString<'a> {
fn quoting(&self, locator: &Locator) -> Quoting {
match self {
Self::Constant(_) => Quoting::CanChange,
Self::FString(f_string) => {
if f_string.values.iter().any(|value| match value {
Expr::FormattedValue(ast::ExprFormattedValue { range, .. }) => {
let string_content = locator.slice(*range);
string_content.contains(['"', '\''])
}
_ => false,
}) {
Quoting::Preserve
} else {
Quoting::CanChange
}
}
}
}
}
impl Ranged for AnyString<'_> {
fn range(&self) -> TextRange {
match self {
Self::Constant(expr) => expr.range(),
Self::FString(expr) => expr.range(),
}
}
}
impl<'a> From<&AnyString<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyString<'a>) -> Self {
match value {
AnyString::Constant(expr) => AnyNodeRef::ExprConstant(expr),
AnyString::FString(expr) => AnyNodeRef::ExprFString(expr),
}
}
}
pub(super) struct FormatString<'a> {
string: &'a AnyString<'a>,
layout: StringLayout,
}
#[derive(Default, Copy, Clone, Debug)]
pub enum StringLayout {
#[default]
Default,
DocString,
ImplicitConcatenatedBinaryLeftSide,
}
impl<'a> FormatString<'a> {
pub(super) fn new(string: &'a AnyString) -> Self {
if let AnyString::Constant(constant) = string {
debug_assert!(constant.value.is_str() || constant.value.is_bytes());
}
Self {
string,
layout: StringLayout::Default,
}
}
pub(super) fn with_layout(mut self, layout: StringLayout) -> Self {
self.layout = layout;
self
}
}
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
match self.layout {
StringLayout::Default => {
let string_range = self.string.range();
let string_content = f.context().locator().slice(string_range);
if is_implicit_concatenation(string_content) {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else {
FormatStringPart::new(
string_range,
self.string.quoting(&f.context().locator()),
&f.context().locator(),
f.options().quote_style(),
)
.fmt(f)
}
}
StringLayout::DocString => {
let string_part = FormatStringPart::new(
self.string.range(),
// f-strings can't be docstrings
Quoting::CanChange,
&f.context().locator(),
f.options().quote_style(),
);
format_docstring(&string_part, f)
}
StringLayout::ImplicitConcatenatedBinaryLeftSide => {
FormatStringContinuation::new(self.string).fmt(f)
}
}
}
}
struct FormatStringContinuation<'a> {
string: &'a AnyString<'a>,
}
impl<'a> FormatStringContinuation<'a> {
fn new(string: &'a AnyString<'a>) -> Self {
if let AnyString::Constant(constant) = string {
debug_assert!(constant.value.is_str() || constant.value.is_bytes());
}
Self { string }
}
}
impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let comments = f.context().comments().clone();
let locator = f.context().locator();
let quote_style = f.options().quote_style();
let mut dangling_comments = comments.dangling_comments(self.string);
let string_range = self.string.range();
let string_content = locator.slice(string_range);
// The AST parses implicit concatenation as a single string.
// Call into the lexer to extract the individual chunks and format each string on its own.
// This code does not yet implement the automatic joining of strings that fit on the same line
// because this is a black preview style.
let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start());
let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
for token in lexer {
let (token, token_range) = match token {
Ok(spanned) => spanned,
Err(LexicalError {
error: LexicalErrorType::IndentationError,
..
}) => {
// This can happen if the string continuation appears anywhere inside of a parenthesized expression
// because the lexer doesn't know about the parentheses. For example, the following snipped triggers an Indentation error
// ```python
// {
// "key": (
// [],
// 'a'
// 'b'
// 'c'
// )
// }
// ```
// Ignoring the error here is *safe* because we know that the program once parsed to a valid AST.
continue;
}
Err(_) => {
return Err(FormatError::syntax_error(
"Unexpected lexer error in string formatting",
));
}
};
match token {
Tok::String { .. } => {
// ```python
// (
// "a"
// # leading
// "the comment above"
// )
// ```
let leading_comments_end = dangling_comments
.partition_point(|comment| comment.slice().start() <= token_range.start());
let (leading_part_comments, rest) =
dangling_comments.split_at(leading_comments_end);
// ```python
// (
// "a" # trailing comment
// "the comment above"
// )
// ```
let trailing_comments_end = rest.partition_point(|comment| {
comment.line_position().is_end_of_line()
&& !locator.contains_line_break(TextRange::new(
token_range.end(),
comment.slice().start(),
))
});
let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
joiner.entry(&format_args![
line_suffix_boundary(),
leading_comments(leading_part_comments),
FormatStringPart::new(
token_range,
self.string.quoting(&locator),
&locator,
quote_style,
),
trailing_comments(trailing_part_comments)
]);
dangling_comments = rest;
}
Tok::Comment(_)
| Tok::NonLogicalNewline
| Tok::Newline
| Tok::Indent
| Tok::Dedent => continue,
token => unreachable!("Unexpected token {token:?}"),
}
}
debug_assert!(dangling_comments.is_empty());
joiner.finish()
}
}
struct FormatStringPart {
prefix: StringPrefix,
preferred_quotes: StringQuotes,
range: TextRange,
is_raw_string: bool,
}
impl FormatStringPart {
fn new(range: TextRange, quoting: Quoting, locator: &Locator, quote_style: QuoteStyle) -> Self {
let string_content = locator.slice(range);
let prefix = StringPrefix::parse(string_content);
let after_prefix = &string_content[usize::from(prefix.text_len())..];
let quotes =
StringQuotes::parse(after_prefix).expect("Didn't find string quotes after prefix");
let relative_raw_content_range = TextRange::new(
prefix.text_len() + quotes.text_len(),
string_content.text_len() - quotes.text_len(),
);
let raw_content_range = relative_raw_content_range + range.start();
let raw_content = &string_content[relative_raw_content_range];
let is_raw_string = prefix.is_raw_string();
let preferred_quotes = match quoting {
Quoting::Preserve => quotes,
Quoting::CanChange => {
if is_raw_string {
preferred_quotes_raw(raw_content, quotes, quote_style)
} else {
preferred_quotes(raw_content, quotes, quote_style)
}
}
};
Self {
prefix,
range: raw_content_range,
preferred_quotes,
is_raw_string,
}
}
}
impl Format<PyFormatContext<'_>> for FormatStringPart {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let (normalized, contains_newlines) = normalize_string(
f.context().locator().slice(self.range),
self.preferred_quotes,
self.is_raw_string,
);
write!(f, [self.prefix, self.preferred_quotes])?;
match normalized {
Cow::Borrowed(_) => {
source_text_slice(self.range, contains_newlines).fmt(f)?;
}
Cow::Owned(normalized) => {
dynamic_text(&normalized, Some(self.range.start())).fmt(f)?;
}
}
self.preferred_quotes.fmt(f)
}
}
bitflags! {
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(super) struct StringPrefix: u8 {
const UNICODE = 0b0000_0001;
/// `r"test"`
const RAW = 0b0000_0010;
/// `R"test"
const RAW_UPPER = 0b0000_0100;
const BYTE = 0b0000_1000;
const F_STRING = 0b0001_0000;
}
}
impl StringPrefix {
pub(super) fn parse(input: &str) -> StringPrefix {
let chars = input.chars();
let mut prefix = StringPrefix::empty();
for c in chars {
let flag = match c {
'u' | 'U' => StringPrefix::UNICODE,
'f' | 'F' => StringPrefix::F_STRING,
'b' | 'B' => StringPrefix::BYTE,
'r' => StringPrefix::RAW,
'R' => StringPrefix::RAW_UPPER,
'\'' | '"' => break,
c => {
unreachable!(
"Unexpected character '{c}' terminating the prefix of a string literal"
);
}
};
prefix |= flag;
}
prefix
}
pub(super) const fn text_len(self) -> TextSize {
TextSize::new(self.bits().count_ones())
}
pub(super) const fn is_raw_string(self) -> bool {
matches!(self, StringPrefix::RAW | StringPrefix::RAW_UPPER)
}
}
impl Format<PyFormatContext<'_>> for StringPrefix {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
// Retain the casing for the raw prefix:
// https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
if self.contains(StringPrefix::RAW) {
text("r").fmt(f)?;
} else if self.contains(StringPrefix::RAW_UPPER) {
text("R").fmt(f)?;
}
if self.contains(StringPrefix::BYTE) {
text("b").fmt(f)?;
}
if self.contains(StringPrefix::F_STRING) {
text("f").fmt(f)?;
}
// Remove the unicode prefix `u` if any because it is meaningless in Python 3+.
Ok(())
}
}
/// Detects the preferred quotes for raw string `input`.
/// The configured quote style is preferred unless `input` contains unescaped quotes of the
/// configured style. For example, `r"foo"` is preferred over `r'foo'` if the configured
/// quote style is double quotes.
fn preferred_quotes_raw(
input: &str,
quotes: StringQuotes,
configured_style: QuoteStyle,
) -> StringQuotes {
let configured_quote_char = configured_style.as_char();
let mut chars = input.chars().peekable();
let contains_unescaped_configured_quotes = loop {
match chars.next() {
Some('\\') => {
// Ignore escaped characters
chars.next();
}
// `"` or `'`
Some(c) if c == configured_quote_char => {
if !quotes.triple {
break true;
}
match chars.peek() {
// We can't turn `r'''\""'''` into `r"""\"""""`, this would confuse the parser
// about where the closing triple quotes start
None => break true,
Some(next) if *next == configured_quote_char => {
// `""` or `''`
chars.next();
// We can't turn `r'''""'''` into `r""""""""`, nor can we have
// `"""` or `'''` respectively inside the string
if chars.peek().is_none() || chars.peek() == Some(&configured_quote_char) {
break true;
}
}
_ => {}
}
}
Some(_) => continue,
None => break false,
}
};
StringQuotes {
triple: quotes.triple,
style: if contains_unescaped_configured_quotes {
quotes.style
} else {
configured_style
},
}
}
/// Detects the preferred quotes for `input`.
/// * single quoted strings: The preferred quote style is the one that requires less escape sequences.
/// * triple quoted strings: Use double quotes except the string contains a sequence of `"""`.
fn preferred_quotes(
input: &str,
quotes: StringQuotes,
configured_style: QuoteStyle,
) -> StringQuotes {
let preferred_style = if quotes.triple {
// True if the string contains a triple quote sequence of the configured quote style.
let mut uses_triple_quotes = false;
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
let configured_quote_char = configured_style.as_char();
match c {
'\\' => {
if matches!(chars.peek(), Some('"' | '\\')) {
chars.next();
}
}
// `"` or `'`
c if c == configured_quote_char => {
match chars.peek().copied() {
Some(c) if c == configured_quote_char => {
// `""` or `''`
chars.next();
if chars.peek().copied() == Some(configured_quote_char) {
// `"""` or `'''`
chars.next();
uses_triple_quotes = true;
}
}
Some(_) => {
// A single quote char, this is ok
}
None => {
// Trailing quote at the end of the comment
uses_triple_quotes = true;
}
}
}
_ => continue,
}
}
if uses_triple_quotes {
// String contains a triple quote sequence of the configured quote style.
// Keep the existing quote style.
quotes.style
} else {
configured_style
}
} else {
let mut single_quotes = 0u32;
let mut double_quotes = 0u32;
for c in input.chars() {
match c {
'\'' => {
single_quotes += 1;
}
'"' => {
double_quotes += 1;
}
_ => continue,
}
}
match configured_style {
QuoteStyle::Single => {
if single_quotes > double_quotes {
QuoteStyle::Double
} else {
QuoteStyle::Single
}
}
QuoteStyle::Double => {
if double_quotes > single_quotes {
QuoteStyle::Single
} else {
QuoteStyle::Double
}
}
}
};
StringQuotes {
triple: quotes.triple,
style: preferred_style,
}
}
#[derive(Copy, Clone, Debug)]
pub(super) struct StringQuotes {
triple: bool,
style: QuoteStyle,
}
impl StringQuotes {
pub(super) fn parse(input: &str) -> Option<StringQuotes> {
let mut chars = input.chars();
let quote_char = chars.next()?;
let style = QuoteStyle::try_from(quote_char).ok()?;
let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);
Some(Self { triple, style })
}
pub(super) const fn is_triple(self) -> bool {
self.triple
}
const fn text_len(self) -> TextSize {
if self.triple {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
}
impl Format<PyFormatContext<'_>> for StringQuotes {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let quotes = match (self.style, self.triple) {
(QuoteStyle::Single, false) => "'",
(QuoteStyle::Single, true) => "'''",
(QuoteStyle::Double, false) => "\"",
(QuoteStyle::Double, true) => "\"\"\"",
};
text(quotes).fmt(f)
}
}
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
/// with the provided `style`.
///
/// Returns the normalized string and whether it contains new lines.
fn normalize_string(
input: &str,
quotes: StringQuotes,
is_raw: bool,
) -> (Cow<str>, ContainsNewlines) {
// The normalized string if `input` is not yet normalized.
// `output` must remain empty if `input` is already normalized.
let mut output = String::new();
// Tracks the last index of `input` that has been written to `output`.
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
let mut last_index = 0;
let mut newlines = ContainsNewlines::No;
let style = quotes.style;
let preferred_quote = style.as_char();
let opposite_quote = style.invert().as_char();
let mut chars = input.char_indices();
while let Some((index, c)) = chars.next() {
if c == '\r' {
output.push_str(&input[last_index..index]);
// Skip over the '\r' character, keep the `\n`
if input.as_bytes().get(index + 1).copied() == Some(b'\n') {
chars.next();
}
// Replace the `\r` with a `\n`
else {
output.push('\n');
}
last_index = index + '\r'.len_utf8();
newlines = ContainsNewlines::Yes;
} else if c == '\n' {
newlines = ContainsNewlines::Yes;
} else if !quotes.triple && !is_raw {
if c == '\\' {
if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote {
// Remove the escape by ending before the backslash and starting again with the quote
chars.next();
output.push_str(&input[last_index..index]);
last_index = index + '\\'.len_utf8();
} else if next == preferred_quote {
// Quote is already escaped, skip over it.
chars.next();
} else if next == '\\' {
// Skip over escaped backslashes
chars.next();
}
}
} else if c == preferred_quote {
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');
output.push(c);
last_index = index + preferred_quote.len_utf8();
}
}
}
let normalized = if last_index == 0 {
Cow::Borrowed(input)
} else {
output.push_str(&input[last_index..]);
Cow::Owned(output)
};
(normalized, newlines)
}
/// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up
/// to the next multiple of 8. This is effectively a port of
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs),
/// which black [calls with the default tab width of 8](https://github.com/psf/black/blob/c36e468794f9256d5e922c399240d49782ba04f1/src/black/strings.py#L61)
fn count_indentation_like_black(line: &str) -> TextSize {
let tab_width: u32 = 8;
let mut indentation = TextSize::default();
for char in line.chars() {
if char == '\t' {
// Pad to the next multiple of tab_width
indentation += TextSize::from(tab_width - (indentation.to_u32().rem_euclid(tab_width)));
} else if char.is_whitespace() {
indentation += char.text_len();
} else {
return indentation;
}
}
indentation
}
/// Format a docstring by trimming whitespace and adjusting the indentation.
///
/// Summary of changes we make:
/// * Normalize the string like all other strings
/// * Ignore docstring that have an escaped newline
/// * Trim all trailing whitespace, except for a chaperone space that avoids quotes or backslashes
/// in the last line.
/// * Trim leading whitespace on the first line, again except for a chaperone space
/// * If there is only content in the first line and after that only whitespace, collapse the
/// docstring into one line
/// * Adjust the indentation (see below)
///
/// # Docstring indentation
///
/// Unlike any other string, like black we change the indentation of docstring lines.
///
/// We want to preserve the indentation inside the docstring relative to the suite statement/block
/// indent that the docstring statement is in, but also want to apply the change of the outer
/// indentation in the docstring, e.g.
/// ```python
/// def sparkle_sky():
/// """Make a pretty sparkly sky.
/// * * ✨ *. .
/// * * ✨ .
/// . * . ✨ * . .
/// """
/// ```
/// should become
/// ```python
/// def sparkle_sky():
/// """Make a pretty sparkly sky.
/// * * ✨ *. .
/// * * ✨ .
/// . * . ✨ * . .
/// """
/// ```
/// We can't compute the full indentation here since we don't know what the block indent of
/// the doc comment will be yet and which we can only have added by formatting each line
/// separately with a hard line break. This means we need to strip shared indentation from
/// docstring while preserving the in-docstring bigger-than-suite-statement indentation. Example:
/// ```python
/// def f():
/// """first line
/// line a
/// line b
/// """
/// ```
/// The docstring indentation is 2, the block indents will change this to 4 (but we can't
/// determine this at this point). The indentation of line a is 2, so we trim ` line a`
/// to `line a`. For line b it's 5, so we trim it to `line b` and pad with 5-2=3 spaces to
/// ` line b`. The closing quotes, being on their own line, are stripped get only the
/// default indentation. Fully formatted:
/// ```python
/// def f():
/// """first line
/// line a
/// line b
/// """
/// ```
///
/// Tabs are counted by padding them to the next multiple of 8 according to
/// [`str.expandtabs`](https://docs.python.org/3/library/stdtypes.html#str.expandtabs). When
/// we see indentation that contains a tab or any other none ascii-space whitespace we rewrite the
/// string.
///
/// Additionally, if any line in the docstring has less indentation than the docstring
/// (effectively a negative indentation wrt. to the current level), we pad all lines to the
/// level of the docstring with spaces.
/// ```python
/// def f():
/// """first line
/// line a
/// line b
/// line c
/// """
/// ```
/// Here line a is 3 columns negatively indented, so we pad all lines by an extra 3 spaces:
/// ```python
/// def f():
/// """first line
/// line a
/// line b
/// line c
/// """
/// ```
fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
// Black doesn't change the indentation of docstrings that contain an escaped newline
if locator.slice(string_part.range).contains("\\\n") {
return string_part.fmt(f);
}
let (normalized, _) = normalize_string(
locator.slice(string_part.range),
string_part.preferred_quotes,
string_part.is_raw_string,
);
// is_borrowed is unstable :/
let already_normalized = matches!(normalized, Cow::Borrowed(_));
let mut lines = normalized.lines().peekable();
// Start the string
write!(
f,
[
source_position(string_part.range.start()),
string_part.prefix,
string_part.preferred_quotes
]
)?;
// We track where in the source docstring we are (in source code byte offsets)
let mut offset = string_part.range.start();
// The first line directly after the opening quotes has different rules than the rest, mainly
// that we remove all leading whitespace as there's no indentation
let first = lines.next().unwrap_or_default();
// Black trims whitespace using [`str.strip()`](https://docs.python.org/3/library/stdtypes.html#str.strip)
// https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/strings.py#L77-L85
// So we use the unicode whitespace definition through `trim_{start,end}` instead of the python
// tokenizer whitespace definition in `trim_whitespace_{start,end}`.
let trim_end = first.trim_end();
let trim_both = trim_end.trim_start();
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
// inner quotes and closing quotes from getting to close to avoid `""""content`
if trim_both.starts_with(string_part.preferred_quotes.style.as_char()) {
space().fmt(f)?;
}
if !trim_end.is_empty() {
// For the first line of the docstring we strip the leading and trailing whitespace, e.g.
// `""" content ` to `"""content`
let leading_whitespace = trim_end.text_len() - trim_both.text_len();
let trimmed_line_range =
TextRange::at(offset, trim_end.text_len()).add_start(leading_whitespace);
if already_normalized {
source_text_slice(trimmed_line_range, ContainsNewlines::No).fmt(f)?;
} else {
dynamic_text(trim_both, Some(trimmed_line_range.start())).fmt(f)?;
}
}
offset += first.text_len();
// Check if we have a single line (or empty) docstring
if normalized[first.len()..].trim().is_empty() {
// For `"""\n"""` or other whitespace between the quotes, black keeps a single whitespace,
// but `""""""` doesn't get one inserted.
if needs_chaperone_space(string_part, trim_end)
|| (trim_end.is_empty() && !normalized.is_empty())
{
space().fmt(f)?;
}
string_part.preferred_quotes.fmt(f)?;
return Ok(());
}
hard_line_break().fmt(f)?;
// We know that the normalized string has \n line endings
offset += "\n".text_len();
// If some line of the docstring is less indented than the function body, we pad all lines to
// align it with the docstring statement. Conversely, if all lines are over-indented, we strip
// the extra indentation. We call this stripped indentation since it's relative to the block
// indent printer-made indentation.
let stripped_indentation = lines
.clone()
// We don't want to count whitespace-only lines as miss-indented
.filter(|line| !line.trim().is_empty())
.map(count_indentation_like_black)
.min()
.unwrap_or_default();
while let Some(line) = lines.next() {
let is_last = lines.peek().is_none();
format_docstring_line(
line,
is_last,
offset,
stripped_indentation,
already_normalized,
f,
)?;
// We know that the normalized string has \n line endings
offset += line.text_len() + "\n".text_len();
}
// Same special case in the last line as for the first line
let trim_end = normalized
.as_ref()
.trim_end_matches(|c: char| c.is_whitespace() && c != '\n');
if needs_chaperone_space(string_part, trim_end) {
space().fmt(f)?;
}
write!(
f,
[
string_part.preferred_quotes,
source_position(string_part.range.end())
]
)
}
/// If the last line of the docstring is `content" """` or `content\ """`, we need a chaperone space
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
fn needs_chaperone_space(string_part: &FormatStringPart, trim_end: &str) -> bool {
trim_end.ends_with(string_part.preferred_quotes.style.as_char())
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
}
/// Format a docstring line that is not the first line
fn format_docstring_line(
line: &str,
is_last: bool,
offset: TextSize,
stripped_indentation: TextSize,
already_normalized: bool,
f: &mut PyFormatter,
) -> FormatResult<()> {
let trim_end = line.trim_end();
if trim_end.is_empty() {
return if is_last {
// If the doc string ends with ` """`, the last line is ` `, but we don't want to
// insert an empty line (but close the docstring)
Ok(())
} else {
empty_line().fmt(f)
};
}
let tab_or_non_ascii_space = trim_end
.chars()
.take_while(|c| c.is_whitespace())
.any(|c| c != ' ');
if tab_or_non_ascii_space {
// We strip the indentation that is shared with the docstring statement, unless a line
// was indented less than the docstring statement, in which we strip only this much
// indentation to implicitly pad all lines by the difference, or all lines were
// overindented, in which case we strip the additional whitespace (see example in
// [`format_docstring`] doc comment). We then prepend the in-docstring indentation to the
// string.
let indent_len = count_indentation_like_black(trim_end) - stripped_indentation;
let in_docstring_indent = " ".repeat(indent_len.to_usize()) + trim_end.trim_start();
dynamic_text(&in_docstring_indent, Some(offset)).fmt(f)?;
} else {
// Take the string with the trailing whitespace removed, then also skip the leading
// whitespace
let trimmed_line_range =
TextRange::at(offset, trim_end.text_len()).add_start(stripped_indentation);
if already_normalized {
source_text_slice(trimmed_line_range, ContainsNewlines::No).fmt(f)?;
} else {
// All indents are ascii spaces, so the slicing is correct
dynamic_text(
&trim_end[stripped_indentation.to_usize()..],
Some(trimmed_line_range.start()),
)
.fmt(f)?;
}
}
// We handled the case that the closing quotes are on their own line above (the last line is
// empty except for whitespace). If they are on the same line as content, we don't insert a line
// break.
if !is_last {
hard_line_break().fmt(f)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use crate::expression::string::count_indentation_like_black;
#[test]
fn test_indentation_like_black() {
assert_eq!(count_indentation_like_black("\t \t \t").to_u32(), 24);
assert_eq!(count_indentation_like_black("\t \t").to_u32(), 24);
assert_eq!(count_indentation_like_black("\t\t\t").to_u32(), 24);
assert_eq!(count_indentation_like_black(" ").to_u32(), 4);
}
}