From 56440ad8355f54bc1f62ab06b37e085a737a8785 Mon Sep 17 00:00:00 2001 From: konsti Date: Wed, 13 Sep 2023 10:45:46 +0200 Subject: [PATCH] Introduce `ArgOrKeyword` to keep call parameter order (#7302) ## Motivation The `ast::Arguments` for call argument are split into positional arguments (args) and keywords arguments (keywords). We currently assume that call consists of first args and then keywords, which is generally the case, but not always: ```python f(*args, a=2, *args2, **kwargs) class A(*args, a=2, *args2, **kwargs): pass ``` The consequence is accidentally reordering arguments (https://github.com/astral-sh/ruff/pull/7268). ## Summary `Arguments::args_and_keywords` returns an iterator of an `ArgOrKeyword` enum that yields args and keywords in the correct order. I've fixed the obvious `args` and `keywords` usages, but there might be some cases with wrong assumptions remaining. ## Test Plan The generator got new test cases, otherwise the stacked PR (https://github.com/astral-sh/ruff/pull/7268) which uncovered this. --- Cargo.lock | 1 + crates/ruff/src/autofix/edits.rs | 8 +-- crates/ruff_python_ast/Cargo.toml | 1 + crates/ruff_python_ast/src/helpers.rs | 4 ++ crates/ruff_python_ast/src/node.rs | 25 +++----- crates/ruff_python_ast/src/nodes.rs | 69 ++++++++++++++++++++ crates/ruff_python_ast/src/visitor.rs | 3 + crates/ruff_python_codegen/src/generator.rs | 71 +++++++++++++-------- 8 files changed, 133 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9eae7ab298..e5d698258e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2304,6 +2304,7 @@ dependencies = [ "bitflags 2.4.0", "insta", "is-macro", + "itertools", "memchr", "num-bigint", "num-traits", diff --git a/crates/ruff/src/autofix/edits.rs b/crates/ruff/src/autofix/edits.rs index 82d8e749cb..e11d8a53e6 100644 --- a/crates/ruff/src/autofix/edits.rs +++ b/crates/ruff/src/autofix/edits.rs @@ -3,7 +3,7 @@ use anyhow::{Context, Result}; use ruff_diagnostics::Edit; -use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Expr, Keyword, Stmt}; +use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Stmt}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; use ruff_python_trivia::{ @@ -92,10 +92,8 @@ pub(crate) fn remove_argument( ) -> Result { // Partition into arguments before and after the argument to remove. let (before, after): (Vec<_>, Vec<_>) = arguments - .args - .iter() - .map(Expr::range) - .chain(arguments.keywords.iter().map(Keyword::range)) + .arguments_source_order() + .map(|arg| arg.range()) .filter(|range| argument.range() != *range) .partition(|range| range.start() < argument.start()); diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index ac33034c90..b98a00ca23 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -19,6 +19,7 @@ ruff_text_size = { path = "../ruff_text_size" } bitflags = { workspace = true } is-macro = { workspace = true } +itertools = { workspace = true } memchr = { workspace = true } num-bigint = { workspace = true } num-traits = { workspace = true } diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index fe5bba4db9..9e9ce3179c 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -207,6 +207,8 @@ pub fn any_over_expr(expr: &Expr, func: &dyn Fn(&Expr) -> bool) -> bool { range: _, }) => { any_over_expr(call_func, func) + // Note that this is the evaluation order but not necessarily the declaration order + // (e.g. for `f(*args, a=2, *args2, **kwargs)` it's not) || args.iter().any(|expr| any_over_expr(expr, func)) || keywords .iter() @@ -347,6 +349,8 @@ pub fn any_over_stmt(stmt: &Stmt, func: &dyn Fn(&Expr) -> bool) -> bool { decorator_list, .. }) => { + // Note that e.g. `class A(*args, a=2, *args2, **kwargs): pass` is a valid class + // definition arguments .as_deref() .is_some_and(|Arguments { args, keywords, .. }| { diff --git a/crates/ruff_python_ast/src/node.rs b/crates/ruff_python_ast/src/node.rs index 60e9c15789..35cfb9147c 100644 --- a/crates/ruff_python_ast/src/node.rs +++ b/crates/ruff_python_ast/src/node.rs @@ -1,9 +1,9 @@ use crate::visitor::preorder::PreorderVisitor; use crate::{ - self as ast, Alias, Arguments, Comprehension, Decorator, ExceptHandler, Expr, Keyword, - MatchCase, Mod, Parameter, ParameterWithDefault, Parameters, Pattern, PatternArguments, - PatternKeyword, Stmt, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, - TypeParams, WithItem, + self as ast, Alias, ArgOrKeyword, Arguments, Comprehension, Decorator, ExceptHandler, Expr, + Keyword, MatchCase, Mod, Parameter, ParameterWithDefault, Parameters, Pattern, + PatternArguments, PatternKeyword, Stmt, TypeParam, TypeParamParamSpec, TypeParamTypeVar, + TypeParamTypeVarTuple, TypeParams, WithItem, }; use ruff_text_size::{Ranged, TextRange}; use std::ptr::NonNull; @@ -3549,18 +3549,11 @@ impl AstNode for Arguments { where V: PreorderVisitor<'a> + ?Sized, { - let ast::Arguments { - range: _, - args, - keywords, - } = self; - - for arg in args { - visitor.visit_expr(arg); - } - - for keyword in keywords { - visitor.visit_keyword(keyword); + for arg_or_keyword in self.arguments_source_order() { + match arg_or_keyword { + ArgOrKeyword::Arg(arg) => visitor.visit_expr(arg), + ArgOrKeyword::Keyword(keyword) => visitor.visit_keyword(keyword), + } } } } diff --git a/crates/ruff_python_ast/src/nodes.rs b/crates/ruff_python_ast/src/nodes.rs index c0ad1a32bd..9c4d8b594a 100644 --- a/crates/ruff_python_ast/src/nodes.rs +++ b/crates/ruff_python_ast/src/nodes.rs @@ -1,5 +1,6 @@ #![allow(clippy::derive_partial_eq_without_eq)] +use itertools::Itertools; use std::fmt; use std::fmt::Debug; use std::ops::Deref; @@ -2177,6 +2178,34 @@ pub struct Arguments { pub keywords: Vec, } +/// An entry in the argument list of a function call. +#[derive(Clone, Debug, PartialEq)] +pub enum ArgOrKeyword<'a> { + Arg(&'a Expr), + Keyword(&'a Keyword), +} + +impl<'a> From<&'a Expr> for ArgOrKeyword<'a> { + fn from(arg: &'a Expr) -> Self { + Self::Arg(arg) + } +} + +impl<'a> From<&'a Keyword> for ArgOrKeyword<'a> { + fn from(keyword: &'a Keyword) -> Self { + Self::Keyword(keyword) + } +} + +impl Ranged for ArgOrKeyword<'_> { + fn range(&self) -> TextRange { + match self { + Self::Arg(arg) => arg.range(), + Self::Keyword(keyword) => keyword.range(), + } + } +} + impl Arguments { /// Return the number of positional and keyword arguments. pub fn len(&self) -> usize { @@ -2212,6 +2241,46 @@ impl Arguments { .map(|keyword| &keyword.value) .or_else(|| self.find_positional(position)) } + + /// Return the positional and keyword arguments in the order of declaration. + /// + /// Positional arguments are generally before keyword arguments, but star arguments are an + /// exception: + /// ```python + /// class A(*args, a=2, *args2, **kwargs): + /// pass + /// + /// f(*args, a=2, *args2, **kwargs) + /// ``` + /// where `*args` and `args2` are `args` while `a=1` and `kwargs` are `keywords`. + /// + /// If you would just chain `args` and `keywords` the call would get reordered which we don't + /// want. This function instead "merge sorts" them into the correct order. + /// + /// Note that the order of evaluation is always first `args`, then `keywords`: + /// ```python + /// def f(*args, **kwargs): + /// pass + /// + /// def g(x): + /// print(x) + /// return x + /// + /// + /// f(*g([1]), a=g(2), *g([3]), **g({"4": 5})) + /// ``` + /// Output: + /// ```text + /// [1] + /// [3] + /// 2 + /// {'4': 5} + /// ``` + pub fn arguments_source_order(&self) -> impl Iterator> { + let args = self.args.iter().map(ArgOrKeyword::Arg); + let keywords = self.keywords.iter().map(ArgOrKeyword::Keyword); + args.merge_by(keywords, |left, right| left.start() < right.start()) + } } /// An AST node used to represent a sequence of type parameters. diff --git a/crates/ruff_python_ast/src/visitor.rs b/crates/ruff_python_ast/src/visitor.rs index 7d30a76e08..3c018c3b10 100644 --- a/crates/ruff_python_ast/src/visitor.rs +++ b/crates/ruff_python_ast/src/visitor.rs @@ -573,6 +573,9 @@ pub fn walk_format_spec<'a, V: Visitor<'a> + ?Sized>(visitor: &mut V, format_spe } pub fn walk_arguments<'a, V: Visitor<'a> + ?Sized>(visitor: &mut V, arguments: &'a Arguments) { + // Note that the there might be keywords before the last arg, e.g. in + // f(*args, a=2, *args2, **kwargs)`, but we follow Python in evaluating first `args` and then + // `keywords`. See also [Arguments::arguments_as_declared`]. for arg in &arguments.args { visitor.visit_expr(arg); } diff --git a/crates/ruff_python_codegen/src/generator.rs b/crates/ruff_python_codegen/src/generator.rs index 4a40bf5720..d17cb3e75f 100644 --- a/crates/ruff_python_codegen/src/generator.rs +++ b/crates/ruff_python_codegen/src/generator.rs @@ -3,9 +3,10 @@ use std::ops::Deref; use ruff_python_ast::{ - self as ast, Alias, BoolOp, CmpOp, Comprehension, Constant, ConversionFlag, DebugText, - ExceptHandler, Expr, Identifier, MatchCase, Operator, Parameter, Parameters, Pattern, Stmt, - Suite, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem, + self as ast, Alias, ArgOrKeyword, BoolOp, CmpOp, Comprehension, Constant, ConversionFlag, + DebugText, ExceptHandler, Expr, Identifier, MatchCase, Operator, Parameter, Parameters, + Pattern, Stmt, Suite, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, + WithItem, }; use ruff_python_ast::{ParameterWithDefault, TypeParams}; use ruff_python_literal::escape::{AsciiEscape, Escape, UnicodeEscape}; @@ -265,19 +266,23 @@ impl<'a> Generator<'a> { if let Some(arguments) = arguments { self.p("("); let mut first = true; - for base in &arguments.args { - self.p_delim(&mut first, ", "); - self.unparse_expr(base, precedence::MAX); - } - for keyword in &arguments.keywords { - self.p_delim(&mut first, ", "); - if let Some(arg) = &keyword.arg { - self.p_id(arg); - self.p("="); - } else { - self.p("**"); + for arg_or_keyword in arguments.arguments_source_order() { + match arg_or_keyword { + ArgOrKeyword::Arg(arg) => { + self.p_delim(&mut first, ", "); + self.unparse_expr(arg, precedence::MAX); + } + ArgOrKeyword::Keyword(keyword) => { + self.p_delim(&mut first, ", "); + if let Some(arg) = &keyword.arg { + self.p_id(arg); + self.p("="); + } else { + self.p("**"); + } + self.unparse_expr(&keyword.value, precedence::MAX); + } } - self.unparse_expr(&keyword.value, precedence::MAX); } self.p(")"); } @@ -1045,19 +1050,24 @@ impl<'a> Generator<'a> { self.unparse_comp(generators); } else { let mut first = true; - for arg in &arguments.args { - self.p_delim(&mut first, ", "); - self.unparse_expr(arg, precedence::COMMA); - } - for kw in &arguments.keywords { - self.p_delim(&mut first, ", "); - if let Some(arg) = &kw.arg { - self.p_id(arg); - self.p("="); - self.unparse_expr(&kw.value, precedence::COMMA); - } else { - self.p("**"); - self.unparse_expr(&kw.value, precedence::MAX); + + for arg_or_keyword in arguments.arguments_source_order() { + match arg_or_keyword { + ArgOrKeyword::Arg(arg) => { + self.p_delim(&mut first, ", "); + self.unparse_expr(arg, precedence::COMMA); + } + ArgOrKeyword::Keyword(keyword) => { + self.p_delim(&mut first, ", "); + if let Some(arg) = &keyword.arg { + self.p_id(arg); + self.p("="); + self.unparse_expr(&keyword.value, precedence::COMMA); + } else { + self.p("**"); + self.unparse_expr(&keyword.value, precedence::MAX); + } + } } } } @@ -1649,6 +1659,11 @@ class Foo: assert_round_trip!(r#"type Foo[*Ts] = ..."#); assert_round_trip!(r#"type Foo[**P] = ..."#); assert_round_trip!(r#"type Foo[T, U, *Ts, **P] = ..."#); + // https://github.com/astral-sh/ruff/issues/6498 + assert_round_trip!(r#"f(a=1, *args, **kwargs)"#); + assert_round_trip!(r#"f(*args, a=1, **kwargs)"#); + assert_round_trip!(r#"f(*args, a=1, *args2, **kwargs)"#); + assert_round_trip!("class A(*args, a=2, *args2, **kwargs):\n pass"); } #[test]