Use CommentRanges in backwards lexing (#7360)

## Summary

The tokenizer was split into a forward and a backwards tokenizer. The
backwards tokenizer uses the same names as the forwards ones (e.g.
`next_token`). The backwards tokenizer gets the comment ranges that we
already built to skip comments.

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
konsti 2023-09-16 05:21:45 +02:00 committed by GitHub
parent 1f6e1485f9
commit 2cbe1733c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
41 changed files with 744 additions and 628 deletions

View file

@ -8,6 +8,7 @@ use ruff_python_ast::{
Constant, Expr, ExprAttribute, ExprBinOp, ExprBoolOp, ExprCompare, ExprConstant, ExprUnaryOp,
UnaryOp,
};
use ruff_python_trivia::CommentRanges;
use ruff_python_trivia::{SimpleToken, SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{Ranged, TextRange};
@ -179,7 +180,13 @@ impl<'a> BinaryLike<'a> {
) {
let expression = operand.expression();
match expression {
Expr::BinOp(binary) if !is_expression_parenthesized(expression.into(), source) => {
Expr::BinOp(binary)
if !is_expression_parenthesized(
expression.into(),
comments.ranges(),
source,
) =>
{
let leading_comments = operand
.leading_binary_comments()
.unwrap_or_else(|| comments.leading(binary));
@ -198,7 +205,11 @@ impl<'a> BinaryLike<'a> {
);
}
Expr::Compare(compare)
if !is_expression_parenthesized(expression.into(), source) =>
if !is_expression_parenthesized(
expression.into(),
comments.ranges(),
source,
) =>
{
let leading_comments = operand
.leading_binary_comments()
@ -218,7 +229,11 @@ impl<'a> BinaryLike<'a> {
);
}
Expr::BoolOp(bool_op)
if !is_expression_parenthesized(expression.into(), source) =>
if !is_expression_parenthesized(
expression.into(),
comments.ranges(),
source,
) =>
{
let leading_comments = operand
.leading_binary_comments()
@ -282,7 +297,11 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
AnyString::from_expression(operand.expression())
.filter(|string| {
string.is_implicit_concatenated()
&& !is_expression_parenthesized(string.into(), source)
&& !is_expression_parenthesized(
string.into(),
comments.ranges(),
source,
)
})
.map(|string| (index, string, operand))
})
@ -430,6 +449,7 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
if (right_operand_has_leading_comments
&& !is_expression_parenthesized(
right_operand.expression().into(),
f.context().comments().ranges(),
f.context().source(),
))
|| right_operator.has_trailing_comments()
@ -466,11 +486,16 @@ impl Format<PyFormatContext<'_>> for BinaryLike<'_> {
}
}
fn is_simple_power_expression(left: &Expr, right: &Expr, source: &str) -> bool {
fn is_simple_power_expression(
left: &Expr,
right: &Expr,
comment_range: &CommentRanges,
source: &str,
) -> bool {
is_simple_power_operand(left)
&& is_simple_power_operand(right)
&& !is_expression_parenthesized(left.into(), source)
&& !is_expression_parenthesized(right.into(), source)
&& !is_expression_parenthesized(left.into(), comment_range, source)
&& !is_expression_parenthesized(right.into(), comment_range, source)
}
/// Return `true` if an [`Expr`] adheres to [Black's definition](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#line-breaks-binary-operators)
@ -664,6 +689,7 @@ impl Format<PyFormatContext<'_>> for FlatBinaryExpressionSlice<'_> {
&& is_simple_power_expression(
left.last_operand().expression(),
right.first_operand().expression(),
f.context().comments().ranges(),
f.context().source(),
);
@ -806,7 +832,7 @@ impl<'a> Operand<'a> {
} => !leading_comments.is_empty(),
Operand::Middle { expression } | Operand::Right { expression, .. } => {
let leading = comments.leading(*expression);
if is_expression_parenthesized((*expression).into(), source) {
if is_expression_parenthesized((*expression).into(), comments.ranges(), source) {
leading.iter().any(|comment| {
!comment.is_formatted()
&& matches!(
@ -853,7 +879,11 @@ impl Format<PyFormatContext<'_>> for Operand<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let expression = self.expression();
return if is_expression_parenthesized(expression.into(), f.context().source()) {
return if is_expression_parenthesized(
expression.into(),
f.context().comments().ranges(),
f.context().source(),
) {
let comments = f.context().comments().clone();
let expression_comments = comments.leading_dangling_trailing(expression);

View file

@ -45,7 +45,7 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
value: Constant::Int(_) | Constant::Float(_),
..
})
) || is_expression_parenthesized(value.into(), f.context().source());
) || is_expression_parenthesized(value.into(), f.context().comments().ranges(), f.context().source());
if call_chain_layout == CallChainLayout::Fluent {
if parenthesize_value {
@ -142,15 +142,22 @@ impl NeedsParentheses for ExprAttribute {
context: &PyFormatContext,
) -> OptionalParentheses {
// Checks if there are any own line comments in an attribute chain (a.b.c).
if CallChainLayout::from_expression(self.into(), context.source())
== CallChainLayout::Fluent
if CallChainLayout::from_expression(
self.into(),
context.comments().ranges(),
context.source(),
) == CallChainLayout::Fluent
{
OptionalParentheses::Multiline
} else if context.comments().has_dangling(self) {
OptionalParentheses::Always
} else if self.value.is_name_expr() {
OptionalParentheses::BestFit
} else if is_expression_parenthesized(self.value.as_ref().into(), context.source()) {
} else if is_expression_parenthesized(
self.value.as_ref().into(),
context.comments().ranges(),
context.source(),
) {
OptionalParentheses::Never
} else {
self.value.needs_parentheses(self.into(), context)

View file

@ -82,13 +82,20 @@ impl NeedsParentheses for ExprCall {
_parent: AnyNodeRef,
context: &PyFormatContext,
) -> OptionalParentheses {
if CallChainLayout::from_expression(self.into(), context.source())
== CallChainLayout::Fluent
if CallChainLayout::from_expression(
self.into(),
context.comments().ranges(),
context.source(),
) == CallChainLayout::Fluent
{
OptionalParentheses::Multiline
} else if context.comments().has_dangling(self) {
OptionalParentheses::Always
} else if is_expression_parenthesized(self.func.as_ref().into(), context.source()) {
} else if is_expression_parenthesized(
self.func.as_ref().into(),
context.comments().ranges(),
context.source(),
) {
OptionalParentheses::Never
} else {
self.func.needs_parentheses(self.into(), context)

View file

@ -101,7 +101,11 @@ impl Format<PyFormatContext<'_>> for FormatOrElse<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
match self.orelse {
Expr::IfExp(expr)
if !is_expression_parenthesized(expr.into(), f.context().source()) =>
if !is_expression_parenthesized(
expr.into(),
f.context().comments().ranges(),
f.context().source(),
) =>
{
write!(f, [expr.format().with_options(ExprIfExpLayout::Nested)])
}

View file

@ -89,11 +89,18 @@ impl NeedsParentheses for ExprSubscript {
context: &PyFormatContext,
) -> OptionalParentheses {
{
if CallChainLayout::from_expression(self.into(), context.source())
== CallChainLayout::Fluent
if CallChainLayout::from_expression(
self.into(),
context.comments().ranges(),
context.source(),
) == CallChainLayout::Fluent
{
OptionalParentheses::Multiline
} else if is_expression_parenthesized(self.value.as_ref().into(), context.source()) {
} else if is_expression_parenthesized(
self.value.as_ref().into(),
context.comments().ranges(),
context.source(),
) {
OptionalParentheses::Never
} else {
match self.value.needs_parentheses(self.into(), context) {

View file

@ -46,7 +46,11 @@ impl FormatNodeRule<ExprUnaryOp> for FormatExprUnaryOp {
// a)
// ```
if comments.has_leading(operand.as_ref())
&& !is_expression_parenthesized(operand.as_ref().into(), f.context().source())
&& !is_expression_parenthesized(
operand.as_ref().into(),
f.context().comments().ranges(),
f.context().source(),
)
{
hard_line_break().fmt(f)?;
} else if op.is_not() {
@ -72,7 +76,11 @@ impl NeedsParentheses for ExprUnaryOp {
context: &PyFormatContext,
) -> OptionalParentheses {
// We preserve the parentheses of the operand. It should not be necessary to break this expression.
if is_expression_parenthesized(self.operand.as_ref().into(), context.source()) {
if is_expression_parenthesized(
self.operand.as_ref().into(),
context.comments().ranges(),
context.source(),
) {
OptionalParentheses::Never
} else {
OptionalParentheses::Multiline

View file

@ -9,6 +9,7 @@ use ruff_python_ast as ast;
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::visitor::preorder::{walk_expr, PreorderVisitor};
use ruff_python_ast::{Constant, Expr, ExpressionRef, Operator};
use ruff_python_trivia::CommentRanges;
use crate::builders::parenthesize_if_expands;
use crate::comments::leading_comments;
@ -103,9 +104,11 @@ impl FormatRule<Expr, PyFormatContext<'_>> for FormatExpr {
});
let parenthesize = match parentheses {
Parentheses::Preserve => {
is_expression_parenthesized(expression.into(), f.context().source())
}
Parentheses::Preserve => is_expression_parenthesized(
expression.into(),
f.context().comments().ranges(),
f.context().source(),
),
Parentheses::Always => true,
// Fluent style means we already have parentheses
Parentheses::Never => false,
@ -186,7 +189,11 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
let comments = f.context().comments();
let preserve_parentheses = parenthesize.is_optional()
&& is_expression_parenthesized((*expression).into(), f.context().source());
&& is_expression_parenthesized(
(*expression).into(),
f.context().comments().ranges(),
f.context().source(),
);
let has_comments =
comments.has_leading(*expression) || comments.has_trailing_own_line(*expression);
@ -581,7 +588,11 @@ impl<'input> PreorderVisitor<'input> for CanOmitOptionalParenthesesVisitor<'inpu
self.last = Some(expr);
// Rule only applies for non-parenthesized expressions.
if is_expression_parenthesized(expr.into(), self.context.source()) {
if is_expression_parenthesized(
expr.into(),
self.context.comments().ranges(),
self.context.source(),
) {
self.any_parenthesized_expressions = true;
} else {
self.visit_subexpression(expr);
@ -635,7 +646,11 @@ pub enum CallChainLayout {
}
impl CallChainLayout {
pub(crate) fn from_expression(mut expr: ExpressionRef, source: &str) -> Self {
pub(crate) fn from_expression(
mut expr: ExpressionRef,
comment_ranges: &CommentRanges,
source: &str,
) -> Self {
let mut attributes_after_parentheses = 0;
loop {
match expr {
@ -646,7 +661,7 @@ impl CallChainLayout {
// data[:100].T
// ^^^^^^^^^^ value
// ```
if is_expression_parenthesized(value.into(), source) {
if is_expression_parenthesized(value.into(), comment_ranges, source) {
// `(a).b`. We preserve these parentheses so don't recurse
attributes_after_parentheses += 1;
break;
@ -674,7 +689,7 @@ impl CallChainLayout {
// f2 = (a).w().t(1,)
// ^ expr
// ```
if is_expression_parenthesized(expr, source) {
if is_expression_parenthesized(expr, comment_ranges, source) {
attributes_after_parentheses += 1;
}
@ -683,7 +698,7 @@ impl CallChainLayout {
}
// We preserve these parentheses so don't recurse
if is_expression_parenthesized(expr, source) {
if is_expression_parenthesized(expr, comment_ranges, source) {
break;
}
}
@ -704,7 +719,11 @@ impl CallChainLayout {
match self {
CallChainLayout::Default => {
if f.context().node_level().is_parenthesized() {
CallChainLayout::from_expression(item.into(), f.context().source())
CallChainLayout::from_expression(
item.into(),
f.context().comments().ranges(),
f.context().source(),
)
} else {
CallChainLayout::NonFluent
}
@ -745,7 +764,7 @@ fn has_parentheses(expr: &Expr, context: &PyFormatContext) -> Option<OwnParenthe
// Otherwise, if the node lacks parentheses (e.g., `(1)`) or only contains empty parentheses
// (e.g., `([])`), we need to check for surrounding parentheses.
if is_expression_parenthesized(expr.into(), context.source()) {
if is_expression_parenthesized(expr.into(), context.comments().ranges(), context.source()) {
return Some(OwnParentheses::NonEmpty);
}

View file

@ -2,7 +2,10 @@ use ruff_formatter::prelude::tag::Condition;
use ruff_formatter::{format_args, write, Argument, Arguments, FormatContext, FormatOptions};
use ruff_python_ast::node::AnyNodeRef;
use ruff_python_ast::ExpressionRef;
use ruff_python_trivia::{first_non_trivia_token, SimpleToken, SimpleTokenKind, SimpleTokenizer};
use ruff_python_trivia::CommentRanges;
use ruff_python_trivia::{
first_non_trivia_token, BackwardsTokenizer, SimpleToken, SimpleTokenKind,
};
use ruff_text_size::Ranged;
use crate::comments::{
@ -100,7 +103,11 @@ pub enum Parentheses {
Never,
}
pub(crate) fn is_expression_parenthesized(expr: ExpressionRef, contents: &str) -> bool {
pub(crate) fn is_expression_parenthesized(
expr: ExpressionRef,
comment_ranges: &CommentRanges,
contents: &str,
) -> bool {
// First test if there's a closing parentheses because it tends to be cheaper.
if matches!(
first_non_trivia_token(expr.end(), contents),
@ -109,11 +116,10 @@ pub(crate) fn is_expression_parenthesized(expr: ExpressionRef, contents: &str) -
..
})
) {
let mut tokenizer =
SimpleTokenizer::up_to_without_back_comment(expr.start(), contents).skip_trivia();
matches!(
tokenizer.next_back(),
BackwardsTokenizer::up_to(expr.start(), contents, comment_ranges)
.skip_trivia()
.next(),
Some(SimpleToken {
kind: SimpleTokenKind::LParen,
..
@ -418,6 +424,7 @@ impl Format<PyFormatContext<'_>> for FormatEmptyParenthesized<'_> {
mod tests {
use ruff_python_ast::ExpressionRef;
use ruff_python_parser::parse_expression;
use ruff_python_trivia::CommentRanges;
use crate::expression::parentheses::is_expression_parenthesized;
@ -427,6 +434,7 @@ mod tests {
let expr = parse_expression(expression, "<filename>").unwrap();
assert!(!is_expression_parenthesized(
ExpressionRef::from(&expr),
&CommentRanges::default(),
expression
));
}