ruff/crates/ruff_python_ast/src/all.rs
Dhruv Manilawala 230c9ce236
Split Constant to individual literal nodes (#8064)
## Summary

This PR splits the `Constant` enum as individual literal nodes. It
introduces the following new nodes for each variant:
* `ExprStringLiteral`
* `ExprBytesLiteral`
* `ExprNumberLiteral`
* `ExprBooleanLiteral`
* `ExprNoneLiteral`
* `ExprEllipsisLiteral`

The main motivation behind this refactor is to introduce the new AST
node for implicit string concatenation in the coming PR. The elements of
that node will be either a string literal, bytes literal or a f-string
which can be implemented using an enum. This means that a string or
bytes literal cannot be represented by `Constant::Str` /
`Constant::Bytes` which creates an inconsistency.

This PR avoids that inconsistency by splitting the constant nodes into
it's own literal nodes, literal being the more appropriate naming
convention from a static analysis tool perspective.

This also makes working with literals in the linter and formatter much
more ergonomic like, for example, if one would want to check if this is
a string literal, it can be done easily using
`Expr::is_string_literal_expr` or matching against `Expr::StringLiteral`
as oppose to matching against the `ExprConstant` and enum `Constant`. A
few AST helper methods can be simplified as well which will be done in a
follow-up PR.

This introduces a new `Expr::is_literal_expr` method which is the same
as `Expr::is_constant_expr`. There are also intermediary changes related
to implicit string concatenation which are quiet less. This is done so
as to avoid having a huge PR which this already is.

## Test Plan

1. Verify and update all of the existing snapshots (parser, visitor)
2. Verify that the ecosystem check output remains **unchanged** for both
the linter and formatter

### Formatter ecosystem check

#### `main`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |

#### `dhruv/constant-to-literal`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |
2023-10-30 12:13:23 +05:30

144 lines
5.9 KiB
Rust

use crate::helpers::map_subscript;
use crate::{self as ast, Expr, Stmt};
use bitflags::bitflags;
bitflags! {
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct DunderAllFlags: u8 {
/// The right-hand-side assignment to __all__ uses an invalid expression (i.e., an
/// expression that doesn't evaluate to a container type, like `__all__ = 1`).
const INVALID_FORMAT = 1 << 0;
/// The right-hand-side assignment to __all__ contains an invalid member (i.e., a
/// non-string, like `__all__ = [1]`).
const INVALID_OBJECT = 1 << 1;
}
}
/// Extract the names bound to a given __all__ assignment.
///
/// Accepts a closure that determines whether a given name (e.g., `"list"`) is a Python builtin.
pub fn extract_all_names<F>(stmt: &Stmt, is_builtin: F) -> (Vec<&str>, DunderAllFlags)
where
F: Fn(&str) -> bool,
{
fn add_to_names<'a>(elts: &'a [Expr], names: &mut Vec<&'a str>, flags: &mut DunderAllFlags) {
for elt in elts {
if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = elt {
names.push(value);
} else {
*flags |= DunderAllFlags::INVALID_OBJECT;
}
}
}
fn extract_elts<F>(expr: &Expr, is_builtin: F) -> (Option<&[Expr]>, DunderAllFlags)
where
F: Fn(&str) -> bool,
{
match expr {
Expr::List(ast::ExprList { elts, .. }) => {
return (Some(elts), DunderAllFlags::empty());
}
Expr::Tuple(ast::ExprTuple { elts, .. }) => {
return (Some(elts), DunderAllFlags::empty());
}
Expr::ListComp(_) => {
// Allow comprehensions, even though we can't statically analyze them.
return (None, DunderAllFlags::empty());
}
Expr::Name(ast::ExprName { id, .. }) => {
// Ex) `__all__ = __all__ + multiprocessing.__all__`
if id == "__all__" {
return (None, DunderAllFlags::empty());
}
}
Expr::Attribute(ast::ExprAttribute { attr, .. }) => {
// Ex) `__all__ = __all__ + multiprocessing.__all__`
if attr == "__all__" {
return (None, DunderAllFlags::empty());
}
}
Expr::Call(ast::ExprCall {
func,
arguments: ast::Arguments { args, keywords, .. },
..
}) => {
// Allow `tuple()`, `list()`, and their generic forms, like `list[int]()`.
if keywords.is_empty() && args.len() <= 1 {
if let Expr::Name(ast::ExprName { id, .. }) = map_subscript(func) {
let id = id.as_str();
if matches!(id, "tuple" | "list") && is_builtin(id) {
let [arg] = args.as_slice() else {
return (None, DunderAllFlags::empty());
};
match arg {
Expr::List(ast::ExprList { elts, .. })
| Expr::Set(ast::ExprSet { elts, .. })
| Expr::Tuple(ast::ExprTuple { elts, .. }) => {
return (Some(elts), DunderAllFlags::empty());
}
_ => {
// We can't analyze other expressions, but they must be
// valid, since the `list` or `tuple` call will ultimately
// evaluate to a list or tuple.
return (None, DunderAllFlags::empty());
}
}
}
}
}
}
Expr::NamedExpr(ast::ExprNamedExpr { value, .. }) => {
// Allow, e.g., `__all__ += (value := ["A", "B"])`.
return extract_elts(value, is_builtin);
}
_ => {}
}
(None, DunderAllFlags::INVALID_FORMAT)
}
let mut names: Vec<&str> = vec![];
let mut flags = DunderAllFlags::empty();
if let Some(value) = match stmt {
Stmt::Assign(ast::StmtAssign { value, .. }) => Some(value),
Stmt::AnnAssign(ast::StmtAnnAssign { value, .. }) => value.as_ref(),
Stmt::AugAssign(ast::StmtAugAssign { value, .. }) => Some(value),
_ => None,
} {
if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = value.as_ref() {
let mut current_left = left;
let mut current_right = right;
loop {
// Process the right side, which should be a "real" value.
let (elts, new_flags) = extract_elts(current_right, |expr| is_builtin(expr));
flags |= new_flags;
if let Some(elts) = elts {
add_to_names(elts, &mut names, &mut flags);
}
// Process the left side, which can be a "real" value or the "rest" of the
// binary operation.
if let Expr::BinOp(ast::ExprBinOp { left, right, .. }) = current_left.as_ref() {
current_left = left;
current_right = right;
} else {
let (elts, new_flags) = extract_elts(current_left, |expr| is_builtin(expr));
flags |= new_flags;
if let Some(elts) = elts {
add_to_names(elts, &mut names, &mut flags);
}
break;
}
}
} else {
let (elts, new_flags) = extract_elts(value, |expr| is_builtin(expr));
flags |= new_flags;
if let Some(elts) = elts {
add_to_names(elts, &mut names, &mut flags);
}
}
}
(names, flags)
}