ruff_python_formatter: implement "dynamic" line width mode for docstring code formatting (#9098)

## Summary

This PR changes the internal `docstring-code-line-width` setting to
additionally accept a string value `dynamic`. When `dynamic` is set, the
line width is dynamically adjusted when reformatting code snippets in
docstrings based on the indent level of the docstring. The result is
that the reformatted lines from the code snippet should not exceed the
"global" line width configuration for the surrounding source.

This PR does not change the default behavior, although I suspect the
default should probably be `dynamic`.

## Test Plan

I added a new configuration to the existing docstring code tests and
also added a new set of tests dedicated to the new `dynamic` mode.
This commit is contained in:
Andrew Gallant 2023-12-12 09:58:07 -05:00 committed by GitHub
parent 5559827a78
commit b972455ac7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 3394 additions and 12 deletions

View file

@ -44,5 +44,11 @@
"docstring_code_line_width": 60,
"indent_style": "space",
"indent_width": 4
},
{
"docstring_code": "enabled",
"docstring_code_line_width": "dynamic",
"indent_style": "space",
"indent_width": 4
}
]

View file

@ -0,0 +1,26 @@
[
{
"docstring_code": "enabled",
"docstring_code_line_width": "dynamic",
"indent_style": "space",
"indent_width": 4
},
{
"docstring_code": "enabled",
"docstring_code_line_width": "dynamic",
"indent_style": "space",
"indent_width": 2
},
{
"docstring_code": "enabled",
"docstring_code_line_width": "dynamic",
"indent_style": "tab",
"indent_width": 4
},
{
"docstring_code": "enabled",
"docstring_code_line_width": "dynamic",
"indent_style": "tab",
"indent_width": 8
}
]

View file

@ -0,0 +1,172 @@
def simple():
"""
First line.
```py
class Abcdefghijklmopqrstuvwxyz(Abc, Def, Ghi, Jkl, Mno, Pqr, Stu, Vwx, Yz, A1, A2, A3, A4, A5):
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
```
Done.
"""
pass
# Like simple, but we double everything up to ensure the indent level is
# tracked correctly.
def repeated():
"""
First line.
```py
class Abcdefghijklmopqrstuvwxyz(Abc, Def, Ghi, Jkl, Mno, Pqr, Stu, Vwx, Yz, A1, A2, A3, A4, A5):
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
class Abcdefghijklmopqrstuvwxyz(Abc, Def, Ghi, Jkl, Mno, Pqr, Stu, Vwx, Yz, A1, A2, A3, A4, A5):
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
```
Done.
"""
pass
# Like simple, but we make one line exactly one character longer than the limit
# (for 4-space indents) and make sure it gets wrapped.
def barely_exceeds_limit():
"""
First line.
```py
class Abcdefghijklmopqrstuvwxyz(Abc, Def, Ghi, Jkl, Mno, Pqr, Stu, Vwx, Yz, A1, A2, A3, A4, A5):
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is 89 columns, which is one
# more than the limit. Therefore, it should get wrapped for
# indent_width >= 4.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a5678)
return 5
self.x = doit( 5 )
```
Done.
"""
pass
# This tests that if the code block is unindented, that it gets indented and
# the dynamic line width setting is applied correctly.
def unindented():
"""
First line.
```py
class Abcdefghijklmopqrstuvwxyz(Abc, Def, Ghi, Jkl, Mno, Pqr, Stu, Vwx, Yz, A1, A2, A3, A4, A5):
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is just one character shy of
# tripping the default line width of 88. So it should not be
# wrapped.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a567)
return 5
self.x = doit( 5 )
```
Done.
"""
pass
# Like unindented, but contains a `print` line where it just barely exceeds the
# globally configured line width *after* its indentation has been corrected.
def unindented_barely_exceeds_limit():
"""
First line.
```py
class Abcdefghijklmopqrstuvwxyz(Abc, Def, Ghi, Jkl, Mno, Pqr, Stu, Vwx, Yz, A1, A2, A3, A4, A5):
def abcdefghijklmnopqrstuvwxyz(self, abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
def abcdefghijklmnopqrstuvwxyz(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4):
# For 4 space indents, this is 89 columns, which is one
# more than the limit. Therefore, it should get wrapped for
# indent_width >= 4.
print(abc, ddef, ghi, jkl, mno, pqr, stu, vwx, yz, a1, a2, a3, a4, a5678)
return 5
self.x = doit( 5 )
```
Done.
"""
pass

View file

@ -1,7 +1,7 @@
use crate::comments::Comments;
use crate::expression::string::QuoteChar;
use crate::PyFormatOptions;
use ruff_formatter::{Buffer, FormatContext, GroupId, SourceCode};
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_source_file::Locator;
use std::fmt::{Debug, Formatter};
use std::ops::{Deref, DerefMut};
@ -12,6 +12,7 @@ pub struct PyFormatContext<'a> {
contents: &'a str,
comments: Comments<'a>,
node_level: NodeLevel,
indent_level: IndentLevel,
/// Set to a non-None value when the formatter is running on a code
/// snippet within a docstring. The value should be the quote character of the
/// docstring containing the code snippet.
@ -30,6 +31,7 @@ impl<'a> PyFormatContext<'a> {
contents,
comments,
node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other),
indent_level: IndentLevel::new(0),
docstring: None,
}
}
@ -51,6 +53,14 @@ impl<'a> PyFormatContext<'a> {
self.node_level
}
pub(crate) fn set_indent_level(&mut self, level: IndentLevel) {
self.indent_level = level;
}
pub(crate) fn indent_level(&self) -> IndentLevel {
self.indent_level
}
pub(crate) fn comments(&self) -> &Comments<'a> {
&self.comments
}
@ -210,3 +220,115 @@ where
.set_node_level(self.saved_level);
}
}
/// The current indent level of the formatter.
///
/// One can determine the the width of the indent itself (in number of ASCII
/// space characters) by multiplying the indent level by the configured indent
/// width.
///
/// This is specifically used inside the docstring code formatter for
/// implementing its "dynamic" line width mode. Namely, in the nested call to
/// the formatter, when "dynamic" mode is enabled, the line width is set to
/// `min(1, line_width - indent_level * indent_width)`, where `line_width` in
/// this context is the global line width setting.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) struct IndentLevel {
/// The numeric level. It is incremented for every whole indent in Python
/// source code.
///
/// Note that the first indentation level is actually 1, since this starts
/// at 0 and is incremented when the first top-level statement is seen. So
/// even though the first top-level statement in Python source will have no
/// indentation, its indentation level is 1.
level: u16,
}
impl IndentLevel {
/// Returns a new indent level for the given value.
pub(crate) fn new(level: u16) -> IndentLevel {
IndentLevel { level }
}
/// Returns the next indent level.
pub(crate) fn increment(self) -> IndentLevel {
IndentLevel {
level: self.level.saturating_add(1),
}
}
/// Convert this indent level into a specific number of ASCII whitespace
/// characters based on the given indent width.
pub(crate) fn to_ascii_spaces(self, width: IndentWidth) -> u16 {
let width = u16::try_from(width.value()).unwrap_or(u16::MAX);
// Why the subtraction? IndentLevel starts at 0 and asks for the "next"
// indent level before seeing the first top-level statement. So it's
// always 1 more than what we expect it to be.
let level = self.level.saturating_sub(1);
width.saturating_mul(level)
}
}
/// Change the [`IndentLevel`] of the formatter for the lifetime of this
/// struct.
pub(crate) struct WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
buffer: D,
saved_level: IndentLevel,
}
impl<'a, B, D> WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
pub(crate) fn new(level: IndentLevel, mut buffer: D) -> Self {
let context = buffer.state_mut().context_mut();
let saved_level = context.indent_level();
context.set_indent_level(level);
Self {
buffer,
saved_level,
}
}
}
impl<'a, B, D> Deref for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
type Target = B;
fn deref(&self) -> &Self::Target {
&self.buffer
}
}
impl<'a, B, D> DerefMut for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.buffer
}
}
impl<'a, B, D> Drop for WithIndentLevel<'a, B, D>
where
D: DerefMut<Target = B>,
B: Buffer<Context = PyFormatContext<'a>>,
{
fn drop(&mut self) {
self.buffer
.state_mut()
.context_mut()
.set_indent_level(self.saved_level);
}
}

View file

@ -7,13 +7,13 @@ use std::{borrow::Cow, collections::VecDeque};
use {once_cell::sync::Lazy, regex::Regex};
use {
ruff_formatter::{write, IndentStyle, Printed},
ruff_formatter::{write, FormatOptions, IndentStyle, LineWidth, Printed},
ruff_python_trivia::{is_python_whitespace, PythonWhitespace},
ruff_source_file::Locator,
ruff_text_size::{Ranged, TextLen, TextRange, TextSize},
};
use crate::{prelude::*, FormatModuleError};
use crate::{prelude::*, DocstringCodeLineWidth, FormatModuleError};
use super::{NormalizedString, QuoteChar};
@ -460,11 +460,22 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> {
.map(|line| line.code)
.collect::<Vec<&str>>()
.join("\n");
let line_width = match self.f.options().docstring_code_line_width() {
DocstringCodeLineWidth::Fixed(width) => width,
DocstringCodeLineWidth::Dynamic => {
let global_line_width = self.f.options().line_width().value();
let indent_width = self.f.options().indent_width();
let indent_level = self.f.context().indent_level();
let current_indent = indent_level.to_ascii_spaces(indent_width);
let width = std::cmp::max(1, global_line_width.saturating_sub(current_indent));
LineWidth::try_from(width).expect("width is capped at a minimum of 1")
}
};
let options = self
.f
.options()
.clone()
.with_line_width(self.f.options().docstring_code_line_width())
.with_line_width(line_width)
// It's perhaps a little odd to be hard-coding the indent
// style here, but I believe it is necessary as a result
// of the whitespace normalization otherwise done in

View file

@ -16,7 +16,8 @@ use crate::comments::{
};
pub use crate::context::PyFormatContext;
pub use crate::options::{
DocstringCode, MagicTrailingComma, PreviewMode, PyFormatOptions, QuoteStyle,
DocstringCode, DocstringCodeLineWidth, MagicTrailingComma, PreviewMode, PyFormatOptions,
QuoteStyle,
};
pub use crate::shared_traits::{AsFormat, FormattedIter, FormattedIterExt, IntoFormat};
use crate::verbatim::suppressed_node;

View file

@ -52,8 +52,7 @@ pub struct PyFormatOptions {
/// The preferred line width at which the formatter should wrap lines in
/// docstring code examples. This only has an impact when `docstring_code`
/// is enabled.
#[cfg_attr(feature = "serde", serde(default = "default_line_width"))]
docstring_code_line_width: LineWidth,
docstring_code_line_width: DocstringCodeLineWidth,
/// Whether preview style formatting is enabled or not
preview: PreviewMode,
@ -83,7 +82,7 @@ impl Default for PyFormatOptions {
magic_trailing_comma: MagicTrailingComma::default(),
source_map_generation: SourceMapGeneration::default(),
docstring_code: DocstringCode::default(),
docstring_code_line_width: default_line_width(),
docstring_code_line_width: DocstringCodeLineWidth::default(),
preview: PreviewMode::default(),
}
}
@ -126,7 +125,7 @@ impl PyFormatOptions {
self.docstring_code
}
pub fn docstring_code_line_width(&self) -> LineWidth {
pub fn docstring_code_line_width(&self) -> DocstringCodeLineWidth {
self.docstring_code_line_width
}
@ -302,3 +301,50 @@ impl DocstringCode {
matches!(self, DocstringCode::Enabled)
}
}
#[derive(Copy, Clone, Eq, PartialEq, CacheKey)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "lowercase"))]
#[cfg_attr(feature = "serde", serde(untagged))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub enum DocstringCodeLineWidth {
Fixed(LineWidth),
#[cfg_attr(
feature = "serde",
serde(deserialize_with = "deserialize_docstring_code_line_width_dynamic")
)]
Dynamic,
}
impl Default for DocstringCodeLineWidth {
fn default() -> DocstringCodeLineWidth {
DocstringCodeLineWidth::Fixed(default_line_width())
}
}
impl std::fmt::Debug for DocstringCodeLineWidth {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match *self {
DocstringCodeLineWidth::Fixed(v) => v.value().fmt(f),
DocstringCodeLineWidth::Dynamic => "dynamic".fmt(f),
}
}
}
/// Responsible for deserializing the `DocstringCodeLineWidth::Dynamic`
/// variant.
fn deserialize_docstring_code_line_width_dynamic<'de, D>(d: D) -> Result<(), D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::{de::Error, Deserialize};
let value = String::deserialize(d)?;
match &*value {
"dynamic" => Ok(()),
s => Err(D::Error::invalid_value(
serde::de::Unexpected::Str(s),
&"dynamic",
)),
}
}

View file

@ -8,7 +8,7 @@ use ruff_text_size::{Ranged, TextRange};
use crate::comments::{
leading_comments, trailing_comments, Comments, LeadingDanglingTrailingComments,
};
use crate::context::{NodeLevel, TopLevelStatementPosition, WithNodeLevel};
use crate::context::{NodeLevel, TopLevelStatementPosition, WithIndentLevel, WithNodeLevel};
use crate::expression::string::StringLayout;
use crate::prelude::*;
use crate::statement::stmt_expr::FormatStmtExpr;
@ -71,7 +71,8 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
let source = f.context().source();
let source_type = f.options().source_type();
let f = &mut WithNodeLevel::new(node_level, f);
let f = WithNodeLevel::new(node_level, f);
let f = &mut WithIndentLevel::new(f.context().indent_level().increment(), f);
// Format the first statement in the body, which often has special formatting rules.
let first = match self.kind {

View file

@ -363,7 +363,7 @@ preview = {preview:?}"#,
line_ending = self.0.line_ending(),
magic_trailing_comma = self.0.magic_trailing_comma(),
docstring_code = self.0.docstring_code(),
docstring_code_line_width = self.0.docstring_code_line_width().value(),
docstring_code_line_width = self.0.docstring_code_line_width(),
preview = self.0.preview()
)
}