mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-30 13:51:37 +00:00

## Summary This PR splits the `Constant` enum as individual literal nodes. It introduces the following new nodes for each variant: * `ExprStringLiteral` * `ExprBytesLiteral` * `ExprNumberLiteral` * `ExprBooleanLiteral` * `ExprNoneLiteral` * `ExprEllipsisLiteral` The main motivation behind this refactor is to introduce the new AST node for implicit string concatenation in the coming PR. The elements of that node will be either a string literal, bytes literal or a f-string which can be implemented using an enum. This means that a string or bytes literal cannot be represented by `Constant::Str` / `Constant::Bytes` which creates an inconsistency. This PR avoids that inconsistency by splitting the constant nodes into it's own literal nodes, literal being the more appropriate naming convention from a static analysis tool perspective. This also makes working with literals in the linter and formatter much more ergonomic like, for example, if one would want to check if this is a string literal, it can be done easily using `Expr::is_string_literal_expr` or matching against `Expr::StringLiteral` as oppose to matching against the `ExprConstant` and enum `Constant`. A few AST helper methods can be simplified as well which will be done in a follow-up PR. This introduces a new `Expr::is_literal_expr` method which is the same as `Expr::is_constant_expr`. There are also intermediary changes related to implicit string concatenation which are quiet less. This is done so as to avoid having a huge PR which this already is. ## Test Plan 1. Verify and update all of the existing snapshots (parser, visitor) 2. Verify that the ecosystem check output remains **unchanged** for both the linter and formatter ### Formatter ecosystem check #### `main` | project | similarity index | total files | changed files | |----------------|------------------:|------------------:|------------------:| | cpython | 0.75803 | 1799 | 1647 | | django | 0.99983 | 2772 | 34 | | home-assistant | 0.99953 | 10596 | 186 | | poetry | 0.99891 | 317 | 17 | | transformers | 0.99966 | 2657 | 330 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99978 | 3669 | 20 | | warehouse | 0.99977 | 654 | 13 | | zulip | 0.99970 | 1459 | 22 | #### `dhruv/constant-to-literal` | project | similarity index | total files | changed files | |----------------|------------------:|------------------:|------------------:| | cpython | 0.75803 | 1799 | 1647 | | django | 0.99983 | 2772 | 34 | | home-assistant | 0.99953 | 10596 | 186 | | poetry | 0.99891 | 317 | 17 | | transformers | 0.99966 | 2657 | 330 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99978 | 3669 | 20 | | warehouse | 0.99977 | 654 | 13 | | zulip | 0.99970 | 1459 | 22 |
104 lines
2.8 KiB
Rust
104 lines
2.8 KiB
Rust
//! Doc line extraction. In this context, a doc line is a line consisting of a
|
|
//! standalone comment or a constant string statement.
|
|
|
|
use std::iter::FusedIterator;
|
|
|
|
use ruff_python_ast::{self as ast, Stmt, Suite};
|
|
use ruff_python_parser::lexer::LexResult;
|
|
use ruff_python_parser::Tok;
|
|
use ruff_text_size::{Ranged, TextSize};
|
|
|
|
use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor};
|
|
use ruff_source_file::{Locator, UniversalNewlineIterator};
|
|
|
|
/// Extract doc lines (standalone comments) from a token sequence.
|
|
pub(crate) fn doc_lines_from_tokens(lxr: &[LexResult]) -> DocLines {
|
|
DocLines::new(lxr)
|
|
}
|
|
|
|
pub(crate) struct DocLines<'a> {
|
|
inner: std::iter::Flatten<core::slice::Iter<'a, LexResult>>,
|
|
prev: TextSize,
|
|
}
|
|
|
|
impl<'a> DocLines<'a> {
|
|
fn new(lxr: &'a [LexResult]) -> Self {
|
|
Self {
|
|
inner: lxr.iter().flatten(),
|
|
prev: TextSize::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Iterator for DocLines<'_> {
|
|
type Item = TextSize;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
let mut at_start_of_line = true;
|
|
loop {
|
|
let (tok, range) = self.inner.next()?;
|
|
|
|
match tok {
|
|
Tok::Comment(..) => {
|
|
if at_start_of_line {
|
|
break Some(range.start());
|
|
}
|
|
}
|
|
Tok::Newline | Tok::NonLogicalNewline => {
|
|
at_start_of_line = true;
|
|
}
|
|
Tok::Indent | Tok::Dedent => {
|
|
// ignore
|
|
}
|
|
_ => {
|
|
at_start_of_line = false;
|
|
}
|
|
}
|
|
|
|
self.prev = range.end();
|
|
}
|
|
}
|
|
}
|
|
|
|
impl FusedIterator for DocLines<'_> {}
|
|
|
|
struct StringLinesVisitor<'a> {
|
|
string_lines: Vec<TextSize>,
|
|
locator: &'a Locator<'a>,
|
|
}
|
|
|
|
impl StatementVisitor<'_> for StringLinesVisitor<'_> {
|
|
fn visit_stmt(&mut self, stmt: &Stmt) {
|
|
if let Stmt::Expr(ast::StmtExpr {
|
|
value: expr,
|
|
range: _,
|
|
}) = stmt
|
|
{
|
|
if expr.is_string_literal_expr() {
|
|
for line in UniversalNewlineIterator::with_offset(
|
|
self.locator.slice(expr.as_ref()),
|
|
expr.start(),
|
|
) {
|
|
self.string_lines.push(line.start());
|
|
}
|
|
}
|
|
}
|
|
walk_stmt(self, stmt);
|
|
}
|
|
}
|
|
|
|
impl<'a> StringLinesVisitor<'a> {
|
|
fn new(locator: &'a Locator<'a>) -> Self {
|
|
Self {
|
|
string_lines: Vec::new(),
|
|
locator,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Extract doc lines (standalone strings) start positions from an AST.
|
|
pub(crate) fn doc_lines_from_ast(python_ast: &Suite, locator: &Locator) -> Vec<TextSize> {
|
|
let mut visitor = StringLinesVisitor::new(locator);
|
|
visitor.visit_body(python_ast);
|
|
visitor.string_lines
|
|
}
|