Split Constant to individual literal nodes (#8064)

## Summary

This PR splits the `Constant` enum as individual literal nodes. It
introduces the following new nodes for each variant:
* `ExprStringLiteral`
* `ExprBytesLiteral`
* `ExprNumberLiteral`
* `ExprBooleanLiteral`
* `ExprNoneLiteral`
* `ExprEllipsisLiteral`

The main motivation behind this refactor is to introduce the new AST
node for implicit string concatenation in the coming PR. The elements of
that node will be either a string literal, bytes literal or a f-string
which can be implemented using an enum. This means that a string or
bytes literal cannot be represented by `Constant::Str` /
`Constant::Bytes` which creates an inconsistency.

This PR avoids that inconsistency by splitting the constant nodes into
it's own literal nodes, literal being the more appropriate naming
convention from a static analysis tool perspective.

This also makes working with literals in the linter and formatter much
more ergonomic like, for example, if one would want to check if this is
a string literal, it can be done easily using
`Expr::is_string_literal_expr` or matching against `Expr::StringLiteral`
as oppose to matching against the `ExprConstant` and enum `Constant`. A
few AST helper methods can be simplified as well which will be done in a
follow-up PR.

This introduces a new `Expr::is_literal_expr` method which is the same
as `Expr::is_constant_expr`. There are also intermediary changes related
to implicit string concatenation which are quiet less. This is done so
as to avoid having a huge PR which this already is.

## Test Plan

1. Verify and update all of the existing snapshots (parser, visitor)
2. Verify that the ecosystem check output remains **unchanged** for both
the linter and formatter

### Formatter ecosystem check

#### `main`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |

#### `dhruv/constant-to-literal`

| project | similarity index | total files | changed files |

|----------------|------------------:|------------------:|------------------:|
| cpython | 0.75803 | 1799 | 1647 |
| django | 0.99983 | 2772 | 34 |
| home-assistant | 0.99953 | 10596 | 186 |
| poetry | 0.99891 | 317 | 17 |
| transformers | 0.99966 | 2657 | 330 |
| twine | 1.00000 | 33 | 0 |
| typeshed | 0.99978 | 3669 | 20 |
| warehouse | 0.99977 | 654 | 13 |
| zulip | 0.99970 | 1459 | 22 |
This commit is contained in:
Dhruv Manilawala 2023-10-30 12:13:23 +05:30 committed by GitHub
parent 78bbf6d403
commit 230c9ce236
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
268 changed files with 6663 additions and 6741 deletions

View file

@ -1,36 +1,14 @@
//! Parsing of string literals, bytes literals, and implicit string concatenation.
use ruff_python_ast::{self as ast, BytesConstant, Constant, Expr, StringConstant};
use ruff_python_ast::{self as ast, Expr};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::lexer::{LexicalError, LexicalErrorType};
use crate::token::{StringKind, Tok};
pub(crate) struct StringConstantWithRange {
value: StringConstant,
range: TextRange,
}
impl Ranged for StringConstantWithRange {
fn range(&self) -> TextRange {
self.range
}
}
pub(crate) struct BytesConstantWithRange {
value: BytesConstant,
range: TextRange,
}
impl Ranged for BytesConstantWithRange {
fn range(&self) -> TextRange {
self.range
}
}
pub(crate) enum StringType {
Str(StringConstantWithRange),
Bytes(BytesConstantWithRange),
Str(ast::ExprStringLiteral),
Bytes(ast::ExprBytesLiteral),
FString(ast::ExprFString),
}
@ -47,7 +25,7 @@ impl Ranged for StringType {
impl StringType {
fn is_unicode(&self) -> bool {
match self {
Self::Str(StringConstantWithRange { value, .. }) => value.unicode,
Self::Str(ast::ExprStringLiteral { unicode, .. }) => *unicode,
_ => false,
}
}
@ -266,8 +244,10 @@ impl<'a> StringParser<'a> {
ch => value.push(ch),
}
}
Ok(Expr::from(ast::ExprConstant {
value: value.into(),
Ok(Expr::from(ast::ExprStringLiteral {
value,
unicode: false,
implicit_concatenated: false,
range: self.range(start_location),
}))
}
@ -294,8 +274,9 @@ impl<'a> StringParser<'a> {
}
}
Ok(StringType::Bytes(BytesConstantWithRange {
value: content.chars().map(|c| c as u8).collect::<Vec<u8>>().into(),
Ok(StringType::Bytes(ast::ExprBytesLiteral {
value: content.chars().map(|c| c as u8).collect::<Vec<u8>>(),
implicit_concatenated: false,
range: self.range(start_location),
}))
}
@ -320,13 +301,10 @@ impl<'a> StringParser<'a> {
self.parse_escaped_char(&mut value)?;
}
}
Ok(StringType::Str(StringConstantWithRange {
value: StringConstant {
value,
unicode: self.kind.is_unicode(),
implicit_concatenated: false,
},
Ok(StringType::Str(ast::ExprStringLiteral {
value,
unicode: self.kind.is_unicode(),
implicit_concatenated: false,
range: self.range(start_location),
}))
}
@ -402,18 +380,13 @@ pub(crate) fn concatenate_strings(
let mut content: Vec<u8> = vec![];
for string in strings {
match string {
StringType::Bytes(BytesConstantWithRange {
value: BytesConstant { value, .. },
..
}) => content.extend(value),
StringType::Bytes(ast::ExprBytesLiteral { value, .. }) => content.extend(value),
_ => unreachable!("Unexpected non-bytes literal."),
}
}
return Ok(ast::ExprConstant {
value: Constant::Bytes(BytesConstant {
value: content,
implicit_concatenated,
}),
return Ok(ast::ExprBytesLiteral {
value: content,
implicit_concatenated,
range,
}
.into());
@ -424,19 +397,14 @@ pub(crate) fn concatenate_strings(
let is_unicode = strings.first().map_or(false, StringType::is_unicode);
for string in strings {
match string {
StringType::Str(StringConstantWithRange {
value: StringConstant { value, .. },
..
}) => content.push_str(&value),
StringType::Str(ast::ExprStringLiteral { value, .. }) => content.push_str(&value),
_ => unreachable!("Unexpected non-string literal."),
}
}
return Ok(ast::ExprConstant {
value: Constant::Str(StringConstant {
value: content,
unicode: is_unicode,
implicit_concatenated,
}),
return Ok(ast::ExprStringLiteral {
value: content,
unicode: is_unicode,
implicit_concatenated,
range,
}
.into());
@ -450,12 +418,10 @@ pub(crate) fn concatenate_strings(
let mut is_unicode = false;
let take_current = |current: &mut String, start, end, unicode| -> Expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Str(StringConstant {
value: std::mem::take(current),
unicode,
implicit_concatenated,
}),
Expr::StringLiteral(ast::ExprStringLiteral {
value: std::mem::take(current),
unicode,
implicit_concatenated,
range: TextRange::new(start, end),
})
};
@ -479,10 +445,7 @@ pub(crate) fn concatenate_strings(
deduped.push(value);
is_unicode = false;
}
Expr::Constant(ast::ExprConstant {
value: Constant::Str(StringConstant { value, unicode, .. }),
..
}) => {
Expr::StringLiteral(ast::ExprStringLiteral { value, unicode, .. }) => {
if current.is_empty() {
is_unicode |= unicode;
current_start = value_range.start();
@ -490,14 +453,13 @@ pub(crate) fn concatenate_strings(
current_end = value_range.end();
current.push_str(&value);
}
_ => unreachable!("Expected `Expr::FormattedValue` or `Expr::Constant`"),
_ => {
unreachable!("Expected `Expr::FormattedValue` or `Expr::StringLiteral`")
}
}
}
}
StringType::Str(StringConstantWithRange {
value: StringConstant { value, unicode, .. },
..
}) => {
StringType::Str(ast::ExprStringLiteral { value, unicode, .. }) => {
if current.is_empty() {
is_unicode |= unicode;
current_start = string_range.start();