ruff/crates/ruff_python_formatter/src/expression/expr_attribute.rs
Dhruv Manilawala 230c9ce236
Split Constant to individual literal nodes (#8064)
## Summary

This PR splits the `Constant` enum as individual literal nodes. It
introduces the following new nodes for each variant:
* `ExprStringLiteral`
* `ExprBytesLiteral`
* `ExprNumberLiteral`
* `ExprBooleanLiteral`
* `ExprNoneLiteral`
* `ExprEllipsisLiteral`

The main motivation behind this refactor is to introduce the new AST
node for implicit string concatenation in the coming PR. The elements of
that node will be either a string literal, bytes literal or a f-string
which can be implemented using an enum. This means that a string or
bytes literal cannot be represented by `Constant::Str` /
`Constant::Bytes` which creates an inconsistency.

This PR avoids that inconsistency by splitting the constant nodes into
it's own literal nodes, literal being the more appropriate naming
convention from a static analysis tool perspective.

This also makes working with literals in the linter and formatter much
more ergonomic like, for example, if one would want to check if this is
a string literal, it can be done easily using
`Expr::is_string_literal_expr` or matching against `Expr::StringLiteral`
as oppose to matching against the `ExprConstant` and enum `Constant`. A
few AST helper methods can be simplified as well which will be done in a
follow-up PR.

This introduces a new `Expr::is_literal_expr` method which is the same
as `Expr::is_constant_expr`. There are also intermediary changes related
to implicit string concatenation which are quiet less. This is done so
as to avoid having a huge PR which this already is.

## Test Plan

1. Verify and update all of the existing snapshots (parser, visitor)
2. Verify that the ecosystem check output remains **unchanged** for both
the linter and formatter

### Formatter ecosystem check

#### `main`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |

#### `dhruv/constant-to-literal`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |
2023-10-30 12:13:23 +05:30

185 lines
6.4 KiB
Rust

use ruff_formatter::{write, FormatRuleWithOptions};
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::{Expr, ExprAttribute, ExprNumberLiteral, Number};
use ruff_python_trivia::{find_only_token_in_range, SimpleTokenKind};
use ruff_text_size::{Ranged, TextRange};
use crate::comments::{dangling_comments, SourceComment};
use crate::expression::parentheses::{
is_expression_parenthesized, NeedsParentheses, OptionalParentheses, Parentheses,
};
use crate::expression::CallChainLayout;
use crate::prelude::*;
#[derive(Default)]
pub struct FormatExprAttribute {
call_chain_layout: CallChainLayout,
}
impl FormatRuleWithOptions<ExprAttribute, PyFormatContext<'_>> for FormatExprAttribute {
type Options = CallChainLayout;
fn with_options(mut self, options: Self::Options) -> Self {
self.call_chain_layout = options;
self
}
}
impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
fn fmt_fields(&self, item: &ExprAttribute, f: &mut PyFormatter) -> FormatResult<()> {
let ExprAttribute {
value,
range: _,
attr,
ctx: _,
} = item;
let call_chain_layout = self.call_chain_layout.apply_in_node(item, f);
let format_inner = format_with(|f: &mut PyFormatter| {
let parenthesize_value =
is_base_ten_number_literal(value.as_ref(), f.context().source()) || {
is_expression_parenthesized(
value.into(),
f.context().comments().ranges(),
f.context().source(),
)
};
if call_chain_layout == CallChainLayout::Fluent {
if parenthesize_value {
// Don't propagate the call chain layout.
value.format().with_options(Parentheses::Always).fmt(f)?;
// Format the dot on its own line.
soft_line_break().fmt(f)?;
} else {
match value.as_ref() {
Expr::Attribute(expr) => {
expr.format().with_options(call_chain_layout).fmt(f)?;
}
Expr::Call(expr) => {
expr.format().with_options(call_chain_layout).fmt(f)?;
soft_line_break().fmt(f)?;
}
Expr::Subscript(expr) => {
expr.format().with_options(call_chain_layout).fmt(f)?;
soft_line_break().fmt(f)?;
}
_ => {
value.format().with_options(Parentheses::Never).fmt(f)?;
}
}
}
} else if parenthesize_value {
value.format().with_options(Parentheses::Always).fmt(f)?;
} else {
value.format().with_options(Parentheses::Never).fmt(f)?;
}
// Identify dangling comments before and after the dot:
// ```python
// (
// (
// a
// ) # `before_dot`
// # `before_dot`
// . # `after_dot`
// # `after_dot`
// b
// )
// ```
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);
let (before_dot, after_dot) = if dangling.is_empty() {
(dangling, dangling)
} else {
let dot_token = find_only_token_in_range(
TextRange::new(item.value.end(), item.attr.start()),
SimpleTokenKind::Dot,
f.context().source(),
);
dangling.split_at(
dangling.partition_point(|comment| comment.start() < dot_token.start()),
)
};
write!(
f,
[
dangling_comments(before_dot),
token("."),
dangling_comments(after_dot),
attr.format()
]
)
});
let is_call_chain_root = self.call_chain_layout == CallChainLayout::Default
&& call_chain_layout == CallChainLayout::Fluent;
if is_call_chain_root {
write!(f, [group(&format_inner)])
} else {
write!(f, [format_inner])
}
}
fn fmt_dangling_comments(
&self,
_dangling_comments: &[SourceComment],
_f: &mut PyFormatter,
) -> FormatResult<()> {
// handle in `fmt_fields`
Ok(())
}
}
impl NeedsParentheses for ExprAttribute {
fn needs_parentheses(
&self,
_parent: AnyNodeRef,
context: &PyFormatContext,
) -> OptionalParentheses {
// Checks if there are any own line comments in an attribute chain (a.b.c).
if CallChainLayout::from_expression(
self.into(),
context.comments().ranges(),
context.source(),
) == CallChainLayout::Fluent
{
OptionalParentheses::Multiline
} else if context.comments().has_dangling(self) {
OptionalParentheses::Always
} else if self.value.is_name_expr() {
OptionalParentheses::BestFit
} else if is_expression_parenthesized(
self.value.as_ref().into(),
context.comments().ranges(),
context.source(),
) {
OptionalParentheses::Never
} else {
self.value.needs_parentheses(self.into(), context)
}
}
}
// Non Hex, octal or binary number literals need parentheses to disambiguate the attribute `.` from
// a decimal point. Floating point numbers don't strictly need parentheses but it reads better (rather than 0.0.test()).
fn is_base_ten_number_literal(expr: &Expr, source: &str) -> bool {
if let Some(ExprNumberLiteral { value, range }) = expr.as_number_literal_expr() {
match value {
Number::Float(_) => true,
Number::Int(_) => {
let text = &source[*range];
!matches!(
text.as_bytes().get(0..2),
Some([b'0', b'x' | b'X' | b'o' | b'O' | b'b' | b'B'])
)
}
Number::Complex { .. } => false,
}
} else {
false
}
}