Split Constant to individual literal nodes (#8064)

## Summary

This PR splits the `Constant` enum as individual literal nodes. It
introduces the following new nodes for each variant:
* `ExprStringLiteral`
* `ExprBytesLiteral`
* `ExprNumberLiteral`
* `ExprBooleanLiteral`
* `ExprNoneLiteral`
* `ExprEllipsisLiteral`

The main motivation behind this refactor is to introduce the new AST
node for implicit string concatenation in the coming PR. The elements of
that node will be either a string literal, bytes literal or a f-string
which can be implemented using an enum. This means that a string or
bytes literal cannot be represented by `Constant::Str` /
`Constant::Bytes` which creates an inconsistency.

This PR avoids that inconsistency by splitting the constant nodes into
it's own literal nodes, literal being the more appropriate naming
convention from a static analysis tool perspective.

This also makes working with literals in the linter and formatter much
more ergonomic like, for example, if one would want to check if this is
a string literal, it can be done easily using
`Expr::is_string_literal_expr` or matching against `Expr::StringLiteral`
as oppose to matching against the `ExprConstant` and enum `Constant`. A
few AST helper methods can be simplified as well which will be done in a
follow-up PR.

This introduces a new `Expr::is_literal_expr` method which is the same
as `Expr::is_constant_expr`. There are also intermediary changes related
to implicit string concatenation which are quiet less. This is done so
as to avoid having a huge PR which this already is.

## Test Plan

1. Verify and update all of the existing snapshots (parser, visitor)
2. Verify that the ecosystem check output remains **unchanged** for both
the linter and formatter

### Formatter ecosystem check

#### `main`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |

#### `dhruv/constant-to-literal`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |
This commit is contained in:
Dhruv Manilawala 2023-10-30 12:13:23 +05:30 committed by GitHub
parent 78bbf6d403
commit 230c9ce236
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
268 changed files with 6663 additions and 6741 deletions

View file

@ -591,8 +591,18 @@ pub enum Expr {
FormattedValue(ExprFormattedValue),
#[is(name = "f_string_expr")]
FString(ExprFString),
#[is(name = "constant_expr")]
Constant(ExprConstant),
#[is(name = "string_literal_expr")]
StringLiteral(ExprStringLiteral),
#[is(name = "bytes_literal_expr")]
BytesLiteral(ExprBytesLiteral),
#[is(name = "number_literal_expr")]
NumberLiteral(ExprNumberLiteral),
#[is(name = "boolean_literal_expr")]
BooleanLiteral(ExprBooleanLiteral),
#[is(name = "none_literal_expr")]
NoneLiteral(ExprNoneLiteral),
#[is(name = "ellipsis_literal_expr")]
EllipsisLiteral(ExprEllipsisLiteral),
#[is(name = "attribute_expr")]
Attribute(ExprAttribute),
#[is(name = "subscript_expr")]
@ -613,6 +623,42 @@ pub enum Expr {
IpyEscapeCommand(ExprIpyEscapeCommand),
}
impl Expr {
/// Returns `true` if the expression is a literal expression.
///
/// A literal expression is either a string literal, bytes literal,
/// integer, float, complex number, boolean, `None`, or ellipsis (`...`).
pub fn is_literal_expr(&self) -> bool {
matches!(
self,
Expr::StringLiteral(_)
| Expr::BytesLiteral(_)
| Expr::NumberLiteral(_)
| Expr::BooleanLiteral(_)
| Expr::NoneLiteral(_)
| Expr::EllipsisLiteral(_)
)
}
pub fn is_implicit_concatenated_string(&self) -> bool {
match self {
Expr::StringLiteral(ExprStringLiteral {
implicit_concatenated,
..
})
| Expr::BytesLiteral(ExprBytesLiteral {
implicit_concatenated,
..
})
| Expr::FString(ExprFString {
implicit_concatenated,
..
}) => *implicit_concatenated,
_ => false,
}
}
}
/// An AST node used to represent a IPython escape command at the expression level.
///
/// For example,
@ -941,16 +987,127 @@ impl From<ExprFString> for Expr {
}
}
/// See also [Constant](https://docs.python.org/3/library/ast.html#ast.Constant)
#[derive(Clone, Debug, PartialEq)]
pub struct ExprConstant {
pub struct ExprStringLiteral {
pub range: TextRange,
pub value: Constant,
pub value: String,
pub unicode: bool,
pub implicit_concatenated: bool,
}
impl From<ExprConstant> for Expr {
fn from(payload: ExprConstant) -> Self {
Expr::Constant(payload)
impl From<ExprStringLiteral> for Expr {
fn from(payload: ExprStringLiteral) -> Self {
Expr::StringLiteral(payload)
}
}
impl Ranged for ExprStringLiteral {
fn range(&self) -> TextRange {
self.range
}
}
impl Deref for ExprStringLiteral {
type Target = str;
fn deref(&self) -> &Self::Target {
self.value.as_str()
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExprBytesLiteral {
pub range: TextRange,
pub value: Vec<u8>,
pub implicit_concatenated: bool,
}
impl From<ExprBytesLiteral> for Expr {
fn from(payload: ExprBytesLiteral) -> Self {
Expr::BytesLiteral(payload)
}
}
impl Ranged for ExprBytesLiteral {
fn range(&self) -> TextRange {
self.range
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExprNumberLiteral {
pub range: TextRange,
pub value: Number,
}
impl From<ExprNumberLiteral> for Expr {
fn from(payload: ExprNumberLiteral) -> Self {
Expr::NumberLiteral(payload)
}
}
impl Ranged for ExprNumberLiteral {
fn range(&self) -> TextRange {
self.range
}
}
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum Number {
Int(int::Int),
Float(f64),
Complex { real: f64, imag: f64 },
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExprBooleanLiteral {
pub range: TextRange,
pub value: bool,
}
impl From<ExprBooleanLiteral> for Expr {
fn from(payload: ExprBooleanLiteral) -> Self {
Expr::BooleanLiteral(payload)
}
}
impl Ranged for ExprBooleanLiteral {
fn range(&self) -> TextRange {
self.range
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExprNoneLiteral {
pub range: TextRange,
}
impl From<ExprNoneLiteral> for Expr {
fn from(payload: ExprNoneLiteral) -> Self {
Expr::NoneLiteral(payload)
}
}
impl Ranged for ExprNoneLiteral {
fn range(&self) -> TextRange {
self.range
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExprEllipsisLiteral {
pub range: TextRange,
}
impl From<ExprEllipsisLiteral> for Expr {
fn from(payload: ExprEllipsisLiteral) -> Self {
Expr::EllipsisLiteral(payload)
}
}
impl Ranged for ExprEllipsisLiteral {
fn range(&self) -> TextRange {
self.range
}
}
@ -2595,142 +2752,6 @@ impl From<bool> for Singleton {
}
}
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum Constant {
None,
Bool(bool),
Str(StringConstant),
Bytes(BytesConstant),
Int(int::Int),
Float(f64),
Complex { real: f64, imag: f64 },
Ellipsis,
}
impl Constant {
/// Returns `true` if the constant is a string or bytes constant that contains multiple,
/// implicitly concatenated string tokens.
pub fn is_implicit_concatenated(&self) -> bool {
match self {
Constant::Str(value) => value.implicit_concatenated,
Constant::Bytes(value) => value.implicit_concatenated,
_ => false,
}
}
/// Returns `true` if the constant is a string constant that is a unicode string (i.e., `u"..."`).
pub fn is_unicode_string(&self) -> bool {
match self {
Constant::Str(value) => value.unicode,
_ => false,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct StringConstant {
/// The string value as resolved by the parser (i.e., without quotes, or escape sequences, or
/// implicit concatenations).
pub value: String,
/// Whether the string is a Unicode string (i.e., `u"..."`).
pub unicode: bool,
/// Whether the string contains multiple string tokens that were implicitly concatenated.
pub implicit_concatenated: bool,
}
impl Deref for StringConstant {
type Target = str;
fn deref(&self) -> &Self::Target {
self.value.as_str()
}
}
impl From<String> for StringConstant {
fn from(value: String) -> StringConstant {
Self {
value,
unicode: false,
implicit_concatenated: false,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BytesConstant {
/// The bytes value as resolved by the parser (i.e., without quotes, or escape sequences, or
/// implicit concatenations).
pub value: Vec<u8>,
/// Whether the string contains multiple string tokens that were implicitly concatenated.
pub implicit_concatenated: bool,
}
impl Deref for BytesConstant {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.value.as_slice()
}
}
impl From<Vec<u8>> for BytesConstant {
fn from(value: Vec<u8>) -> BytesConstant {
Self {
value,
implicit_concatenated: false,
}
}
}
impl From<Vec<u8>> for Constant {
fn from(value: Vec<u8>) -> Constant {
Self::Bytes(BytesConstant {
value,
implicit_concatenated: false,
})
}
}
impl From<String> for Constant {
fn from(value: String) -> Constant {
Self::Str(StringConstant {
value,
unicode: false,
implicit_concatenated: false,
})
}
}
impl From<bool> for Constant {
fn from(value: bool) -> Constant {
Self::Bool(value)
}
}
#[cfg(feature = "rustpython-literal")]
impl std::fmt::Display for Constant {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Constant::None => f.pad("None"),
Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }),
Constant::Str(s) => rustpython_literal::escape::UnicodeEscape::new_repr(s.as_str())
.str_repr()
.write(f),
Constant::Bytes(b) => {
let escape = rustpython_literal::escape::AsciiEscape::new_repr(b);
let repr = escape.bytes_repr().to_string().unwrap();
f.pad(&repr)
}
Constant::Int(i) => std::fmt::Display::fmt(&i, f),
Constant::Float(fp) => f.pad(&rustpython_literal::float::to_string(*fp)),
Constant::Complex { real, imag } => {
if *real == 0.0 {
write!(f, "{imag}j")
} else {
write!(f, "({real}{imag:+}j)")
}
}
Constant::Ellipsis => f.pad("..."),
}
}
}
impl Ranged for crate::nodes::ModModule {
fn range(&self) -> TextRange {
self.range
@ -3007,11 +3028,6 @@ impl Ranged for crate::nodes::ExprFString {
self.range
}
}
impl Ranged for crate::nodes::ExprConstant {
fn range(&self) -> TextRange {
self.range
}
}
impl Ranged for crate::nodes::ExprAttribute {
fn range(&self) -> TextRange {
self.range
@ -3074,7 +3090,12 @@ impl Ranged for crate::Expr {
Self::Call(node) => node.range(),
Self::FormattedValue(node) => node.range(),
Self::FString(node) => node.range(),
Self::Constant(node) => node.range(),
Expr::StringLiteral(node) => node.range(),
Expr::BytesLiteral(node) => node.range(),
Expr::NumberLiteral(node) => node.range(),
Expr::BooleanLiteral(node) => node.range(),
Expr::NoneLiteral(node) => node.range(),
Expr::EllipsisLiteral(node) => node.range(),
Self::Attribute(node) => node.range(),
Self::Subscript(node) => node.range(),
Self::Starred(node) => node.range(),
@ -3375,9 +3396,34 @@ impl From<ExprFString> for ParenthesizedExpr {
Expr::FString(payload).into()
}
}
impl From<ExprConstant> for ParenthesizedExpr {
fn from(payload: ExprConstant) -> Self {
Expr::Constant(payload).into()
impl From<ExprStringLiteral> for ParenthesizedExpr {
fn from(payload: ExprStringLiteral) -> Self {
Expr::StringLiteral(payload).into()
}
}
impl From<ExprBytesLiteral> for ParenthesizedExpr {
fn from(payload: ExprBytesLiteral) -> Self {
Expr::BytesLiteral(payload).into()
}
}
impl From<ExprNumberLiteral> for ParenthesizedExpr {
fn from(payload: ExprNumberLiteral) -> Self {
Expr::NumberLiteral(payload).into()
}
}
impl From<ExprBooleanLiteral> for ParenthesizedExpr {
fn from(payload: ExprBooleanLiteral) -> Self {
Expr::BooleanLiteral(payload).into()
}
}
impl From<ExprNoneLiteral> for ParenthesizedExpr {
fn from(payload: ExprNoneLiteral) -> Self {
Expr::NoneLiteral(payload).into()
}
}
impl From<ExprEllipsisLiteral> for ParenthesizedExpr {
fn from(payload: ExprEllipsisLiteral) -> Self {
Expr::EllipsisLiteral(payload).into()
}
}
impl From<ExprAttribute> for ParenthesizedExpr {
@ -3428,7 +3474,6 @@ mod size_assertions {
assert_eq_size!(StmtClassDef, [u8; 104]);
assert_eq_size!(StmtTry, [u8; 112]);
assert_eq_size!(Expr, [u8; 80]);
assert_eq_size!(Constant, [u8; 40]);
assert_eq_size!(Pattern, [u8; 96]);
assert_eq_size!(Mod, [u8; 32]);
}