Introduce ArgOrKeyword to keep call parameter order (#7302)

## Motivation

The `ast::Arguments` for call argument are split into positional
arguments (args) and keywords arguments (keywords). We currently assume
that call consists of first args and then keywords, which is generally
the case, but not always:

```python
f(*args, a=2, *args2, **kwargs)

class A(*args, a=2, *args2, **kwargs):
    pass
```

The consequence is accidentally reordering arguments
(https://github.com/astral-sh/ruff/pull/7268).

## Summary

`Arguments::args_and_keywords` returns an iterator of an `ArgOrKeyword`
enum that yields args and keywords in the correct order. I've fixed the
obvious `args` and `keywords` usages, but there might be some cases with
wrong assumptions remaining.

## Test Plan

The generator got new test cases, otherwise the stacked PR
(https://github.com/astral-sh/ruff/pull/7268) which uncovered this.
This commit is contained in:
konsti 2023-09-13 10:45:46 +02:00 committed by GitHub
parent 179128dc54
commit 56440ad835
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 133 additions and 49 deletions

1
Cargo.lock generated
View file

@ -2304,6 +2304,7 @@ dependencies = [
"bitflags 2.4.0",
"insta",
"is-macro",
"itertools",
"memchr",
"num-bigint",
"num-traits",

View file

@ -3,7 +3,7 @@
use anyhow::{Context, Result};
use ruff_diagnostics::Edit;
use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Expr, Keyword, Stmt};
use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Stmt};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_trivia::{
@ -92,10 +92,8 @@ pub(crate) fn remove_argument<T: Ranged>(
) -> Result<Edit> {
// Partition into arguments before and after the argument to remove.
let (before, after): (Vec<_>, Vec<_>) = arguments
.args
.iter()
.map(Expr::range)
.chain(arguments.keywords.iter().map(Keyword::range))
.arguments_source_order()
.map(|arg| arg.range())
.filter(|range| argument.range() != *range)
.partition(|range| range.start() < argument.start());

View file

@ -19,6 +19,7 @@ ruff_text_size = { path = "../ruff_text_size" }
bitflags = { workspace = true }
is-macro = { workspace = true }
itertools = { workspace = true }
memchr = { workspace = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }

View file

@ -207,6 +207,8 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool {
range: _,
}) => {
any_over_expr(call_func, func)
// Note that this is the evaluation order but not necessarily the declaration order
// (e.g. for `f(*args, a=2, *args2, **kwargs)` it's not)
|| args.iter().any(|expr| any_over_expr(expr, func))
|| keywords
.iter()
@ -347,6 +349,8 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool {
decorator_list,
..
}) => {
// Note that e.g. `class A(*args, a=2, *args2, **kwargs): pass` is a valid class
// definition
arguments
.as_deref()
.is_some_and(|Arguments { args, keywords, .. }| {

View file

@ -1,9 +1,9 @@
use crate::visitor::preorder::PreorderVisitor;
use crate::{
self as ast, Alias, Arguments, Comprehension, Decorator, ExceptHandler, Expr, Keyword,
MatchCase, Mod, Parameter, ParameterWithDefault, Parameters, Pattern, PatternArguments,
PatternKeyword, Stmt, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
TypeParams, WithItem,
self as ast, Alias, ArgOrKeyword, Arguments, Comprehension, Decorator, ExceptHandler, Expr,
Keyword, MatchCase, Mod, Parameter, ParameterWithDefault, Parameters, Pattern,
PatternArguments, PatternKeyword, Stmt, TypeParam, TypeParamParamSpec, TypeParamTypeVar,
TypeParamTypeVarTuple, TypeParams, WithItem,
};
use ruff_text_size::{Ranged, TextRange};
use std::ptr::NonNull;
@ -3549,18 +3549,11 @@ impl AstNode for Arguments {
where
V: PreorderVisitor<'a> + ?Sized,
{
let ast::Arguments {
range: _,
args,
keywords,
} = self;
for arg in args {
visitor.visit_expr(arg);
for arg_or_keyword in self.arguments_source_order() {
match arg_or_keyword {
ArgOrKeyword::Arg(arg) => visitor.visit_expr(arg),
ArgOrKeyword::Keyword(keyword) => visitor.visit_keyword(keyword),
}
for keyword in keywords {
visitor.visit_keyword(keyword);
}
}
}

View file

@ -1,5 +1,6 @@
#![allow(clippy::derive_partial_eq_without_eq)]
use itertools::Itertools;
use std::fmt;
use std::fmt::Debug;
use std::ops::Deref;
@ -2177,6 +2178,34 @@ pub struct Arguments {
pub keywords: Vec<Keyword>,
}
/// An entry in the argument list of a function call.
#[derive(Clone, Debug, PartialEq)]
pub enum ArgOrKeyword<'a> {
Arg(&'a Expr),
Keyword(&'a Keyword),
}
impl<'a> From<&'a Expr> for ArgOrKeyword<'a> {
fn from(arg: &'a Expr) -> Self {
Self::Arg(arg)
}
}
impl<'a> From<&'a Keyword> for ArgOrKeyword<'a> {
fn from(keyword: &'a Keyword) -> Self {
Self::Keyword(keyword)
}
}
impl Ranged for ArgOrKeyword<'_> {
fn range(&self) -> TextRange {
match self {
Self::Arg(arg) => arg.range(),
Self::Keyword(keyword) => keyword.range(),
}
}
}
impl Arguments {
/// Return the number of positional and keyword arguments.
pub fn len(&self) -> usize {
@ -2212,6 +2241,46 @@ impl Arguments {
.map(|keyword| &keyword.value)
.or_else(|| self.find_positional(position))
}
/// Return the positional and keyword arguments in the order of declaration.
///
/// Positional arguments are generally before keyword arguments, but star arguments are an
/// exception:
/// ```python
/// class A(*args, a=2, *args2, **kwargs):
/// pass
///
/// f(*args, a=2, *args2, **kwargs)
/// ```
/// where `*args` and `args2` are `args` while `a=1` and `kwargs` are `keywords`.
///
/// If you would just chain `args` and `keywords` the call would get reordered which we don't
/// want. This function instead "merge sorts" them into the correct order.
///
/// Note that the order of evaluation is always first `args`, then `keywords`:
/// ```python
/// def f(*args, **kwargs):
/// pass
///
/// def g(x):
/// print(x)
/// return x
///
///
/// f(*g([1]), a=g(2), *g([3]), **g({"4": 5}))
/// ```
/// Output:
/// ```text
/// [1]
/// [3]
/// 2
/// {'4': 5}
/// ```
pub fn arguments_source_order(&self) -> impl Iterator<Item = ArgOrKeyword<'_>> {
let args = self.args.iter().map(ArgOrKeyword::Arg);
let keywords = self.keywords.iter().map(ArgOrKeyword::Keyword);
args.merge_by(keywords, |left, right| left.start() < right.start())
}
}
/// An AST node used to represent a sequence of type parameters.

View file

@ -573,6 +573,9 @@ pub fn walk_format_spec<'a, V: Visitor<'a> + ?Sized>(visitor: &mut V, format_spe
}
pub fn walk_arguments<'a, V: Visitor<'a> + ?Sized>(visitor: &mut V, arguments: &'a Arguments) {
// Note that the there might be keywords before the last arg, e.g. in
// f(*args, a=2, *args2, **kwargs)`, but we follow Python in evaluating first `args` and then
// `keywords`. See also [Arguments::arguments_as_declared`].
for arg in &arguments.args {
visitor.visit_expr(arg);
}

View file

@ -3,9 +3,10 @@
use std::ops::Deref;
use ruff_python_ast::{
self as ast, Alias, BoolOp, CmpOp, Comprehension, Constant, ConversionFlag, DebugText,
ExceptHandler, Expr, Identifier, MatchCase, Operator, Parameter, Parameters, Pattern, Stmt,
Suite, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
self as ast, Alias, ArgOrKeyword, BoolOp, CmpOp, Comprehension, Constant, ConversionFlag,
DebugText, ExceptHandler, Expr, Identifier, MatchCase, Operator, Parameter, Parameters,
Pattern, Stmt, Suite, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple,
WithItem,
};
use ruff_python_ast::{ParameterWithDefault, TypeParams};
use ruff_python_literal::escape::{AsciiEscape, Escape, UnicodeEscape};
@ -265,11 +266,13 @@ impl<'a> Generator<'a> {
if let Some(arguments) = arguments {
self.p("(");
let mut first = true;
for base in &arguments.args {
for arg_or_keyword in arguments.arguments_source_order() {
match arg_or_keyword {
ArgOrKeyword::Arg(arg) => {
self.p_delim(&mut first, ", ");
self.unparse_expr(base, precedence::MAX);
self.unparse_expr(arg, precedence::MAX);
}
for keyword in &arguments.keywords {
ArgOrKeyword::Keyword(keyword) => {
self.p_delim(&mut first, ", ");
if let Some(arg) = &keyword.arg {
self.p_id(arg);
@ -279,6 +282,8 @@ impl<'a> Generator<'a> {
}
self.unparse_expr(&keyword.value, precedence::MAX);
}
}
}
self.p(")");
}
self.p(":");
@ -1045,19 +1050,24 @@ impl<'a> Generator<'a> {
self.unparse_comp(generators);
} else {
let mut first = true;
for arg in &arguments.args {
for arg_or_keyword in arguments.arguments_source_order() {
match arg_or_keyword {
ArgOrKeyword::Arg(arg) => {
self.p_delim(&mut first, ", ");
self.unparse_expr(arg, precedence::COMMA);
}
for kw in &arguments.keywords {
ArgOrKeyword::Keyword(keyword) => {
self.p_delim(&mut first, ", ");
if let Some(arg) = &kw.arg {
if let Some(arg) = &keyword.arg {
self.p_id(arg);
self.p("=");
self.unparse_expr(&kw.value, precedence::COMMA);
self.unparse_expr(&keyword.value, precedence::COMMA);
} else {
self.p("**");
self.unparse_expr(&kw.value, precedence::MAX);
self.unparse_expr(&keyword.value, precedence::MAX);
}
}
}
}
}
@ -1649,6 +1659,11 @@ class Foo:
assert_round_trip!(r#"type Foo[*Ts] = ..."#);
assert_round_trip!(r#"type Foo[**P] = ..."#);
assert_round_trip!(r#"type Foo[T, U, *Ts, **P] = ..."#);
// https://github.com/astral-sh/ruff/issues/6498
assert_round_trip!(r#"f(a=1, *args, **kwargs)"#);
assert_round_trip!(r#"f(*args, a=1, **kwargs)"#);
assert_round_trip!(r#"f(*args, a=1, *args2, **kwargs)"#);
assert_round_trip!("class A(*args, a=2, *args2, **kwargs):\n pass");
}
#[test]