mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 02:12:22 +00:00
Support type alias statements in simple statement positions (#8916)
<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary Our `SoftKeywordTokenizer` only respected soft keywords in compound statement positions -- for example, at the start of a logical line: ```python type X = int ``` However, type aliases can also appear in simple statement positions, like: ```python class Class: type X = int ``` (Note that `match` and `case` are _not_ valid keywords in such positions.) This PR upgrades the tokenizer to track both kinds of valid positions. Closes https://github.com/astral-sh/ruff/issues/8900. Closes https://github.com/astral-sh/ruff/issues/8899. ## Test Plan `cargo test`
This commit is contained in:
parent
073eddb1d9
commit
20782ab02c
4 changed files with 224 additions and 18 deletions
|
@ -822,6 +822,10 @@ type X \
|
||||||
[T] = T
|
[T] = T
|
||||||
type X[T] \
|
type X[T] \
|
||||||
= T
|
= T
|
||||||
|
|
||||||
|
# simple statements
|
||||||
|
type X = int; type X = str; type X = type
|
||||||
|
class X: type X = int
|
||||||
"#;
|
"#;
|
||||||
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
|
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
|
||||||
}
|
}
|
||||||
|
@ -859,10 +863,17 @@ type (
|
||||||
type = 1
|
type = 1
|
||||||
type = x = 1
|
type = x = 1
|
||||||
x = type = 1
|
x = type = 1
|
||||||
|
lambda x: type
|
||||||
";
|
";
|
||||||
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
|
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_invalid_type() {
|
||||||
|
assert!(parse_suite("a: type X = int", "<test>").is_err());
|
||||||
|
assert!(parse_suite("lambda: type X = int", "<test>").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn numeric_literals() {
|
fn numeric_literals() {
|
||||||
let source = r"x = 123456789
|
let source = r"x = 123456789
|
||||||
|
|
|
@ -849,4 +849,98 @@ expression: "parse_suite(source, \"<test>\").unwrap()"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
TypeAlias(
|
||||||
|
StmtTypeAlias {
|
||||||
|
range: 590..602,
|
||||||
|
name: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 595..596,
|
||||||
|
id: "X",
|
||||||
|
ctx: Store,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
type_params: None,
|
||||||
|
value: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 599..602,
|
||||||
|
id: "int",
|
||||||
|
ctx: Load,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
TypeAlias(
|
||||||
|
StmtTypeAlias {
|
||||||
|
range: 604..616,
|
||||||
|
name: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 609..610,
|
||||||
|
id: "X",
|
||||||
|
ctx: Store,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
type_params: None,
|
||||||
|
value: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 613..616,
|
||||||
|
id: "str",
|
||||||
|
ctx: Load,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
TypeAlias(
|
||||||
|
StmtTypeAlias {
|
||||||
|
range: 618..631,
|
||||||
|
name: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 623..624,
|
||||||
|
id: "X",
|
||||||
|
ctx: Store,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
type_params: None,
|
||||||
|
value: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 627..631,
|
||||||
|
id: "type",
|
||||||
|
ctx: Load,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
ClassDef(
|
||||||
|
StmtClassDef {
|
||||||
|
range: 632..653,
|
||||||
|
decorator_list: [],
|
||||||
|
name: Identifier {
|
||||||
|
id: "X",
|
||||||
|
range: 638..639,
|
||||||
|
},
|
||||||
|
type_params: None,
|
||||||
|
arguments: None,
|
||||||
|
body: [
|
||||||
|
TypeAlias(
|
||||||
|
StmtTypeAlias {
|
||||||
|
range: 641..653,
|
||||||
|
name: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 646..647,
|
||||||
|
id: "X",
|
||||||
|
ctx: Store,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
type_params: None,
|
||||||
|
value: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 650..653,
|
||||||
|
id: "int",
|
||||||
|
ctx: Load,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -988,4 +988,44 @@ expression: "parse_suite(source, \"<test>\").unwrap()"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
Expr(
|
||||||
|
StmtExpr {
|
||||||
|
range: 652..666,
|
||||||
|
value: Lambda(
|
||||||
|
ExprLambda {
|
||||||
|
range: 652..666,
|
||||||
|
parameters: Some(
|
||||||
|
Parameters {
|
||||||
|
range: 659..660,
|
||||||
|
posonlyargs: [],
|
||||||
|
args: [
|
||||||
|
ParameterWithDefault {
|
||||||
|
range: 659..660,
|
||||||
|
parameter: Parameter {
|
||||||
|
range: 659..660,
|
||||||
|
name: Identifier {
|
||||||
|
id: "x",
|
||||||
|
range: 659..660,
|
||||||
|
},
|
||||||
|
annotation: None,
|
||||||
|
},
|
||||||
|
default: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
vararg: None,
|
||||||
|
kwonlyargs: [],
|
||||||
|
kwarg: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
body: Name(
|
||||||
|
ExprName {
|
||||||
|
range: 662..666,
|
||||||
|
id: "type",
|
||||||
|
ctx: Load,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use crate::{lexer::LexResult, token::Tok, Mode};
|
|
||||||
use itertools::{Itertools, MultiPeek};
|
use itertools::{Itertools, MultiPeek};
|
||||||
|
|
||||||
|
use crate::{lexer::LexResult, token::Tok, Mode};
|
||||||
|
|
||||||
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||||
/// `case`, and `type`).
|
/// `case`, and `type`).
|
||||||
///
|
///
|
||||||
|
@ -21,7 +22,7 @@ where
|
||||||
I: Iterator<Item = LexResult>,
|
I: Iterator<Item = LexResult>,
|
||||||
{
|
{
|
||||||
underlying: MultiPeek<I>,
|
underlying: MultiPeek<I>,
|
||||||
start_of_line: bool,
|
position: Position,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I> SoftKeywordTransformer<I>
|
impl<I> SoftKeywordTransformer<I>
|
||||||
|
@ -31,7 +32,11 @@ where
|
||||||
pub fn new(lexer: I, mode: Mode) -> Self {
|
pub fn new(lexer: I, mode: Mode) -> Self {
|
||||||
Self {
|
Self {
|
||||||
underlying: lexer.multipeek(), // spell-checker:ignore multipeek
|
underlying: lexer.multipeek(), // spell-checker:ignore multipeek
|
||||||
start_of_line: !matches!(mode, Mode::Expression),
|
position: if mode == Mode::Expression {
|
||||||
|
Position::Other
|
||||||
|
} else {
|
||||||
|
Position::Statement
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,7 +54,6 @@ where
|
||||||
// If the token is a soft keyword e.g. `type`, `match`, or `case`, check if it's
|
// If the token is a soft keyword e.g. `type`, `match`, or `case`, check if it's
|
||||||
// used as an identifier. We assume every soft keyword use is an identifier unless
|
// used as an identifier. We assume every soft keyword use is an identifier unless
|
||||||
// a heuristic is met.
|
// a heuristic is met.
|
||||||
|
|
||||||
match tok {
|
match tok {
|
||||||
// For `match` and `case`, all of the following conditions must be met:
|
// For `match` and `case`, all of the following conditions must be met:
|
||||||
// 1. The token is at the start of a logical line.
|
// 1. The token is at the start of a logical line.
|
||||||
|
@ -57,9 +61,9 @@ where
|
||||||
// inside a parenthesized expression, list, or dictionary).
|
// inside a parenthesized expression, list, or dictionary).
|
||||||
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
|
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
|
||||||
// (This is to avoid treating `match` or `case` as identifiers when annotated with
|
// (This is to avoid treating `match` or `case` as identifiers when annotated with
|
||||||
// type hints.) type hints.)
|
// type hints.)
|
||||||
Tok::Match | Tok::Case => {
|
Tok::Match | Tok::Case => {
|
||||||
if self.start_of_line {
|
if matches!(self.position, Position::Statement) {
|
||||||
let mut nesting = 0;
|
let mut nesting = 0;
|
||||||
let mut first = true;
|
let mut first = true;
|
||||||
let mut seen_colon = false;
|
let mut seen_colon = false;
|
||||||
|
@ -93,7 +97,10 @@ where
|
||||||
// 2. The type token is immediately followed by a name token.
|
// 2. The type token is immediately followed by a name token.
|
||||||
// 3. The name token is eventually followed by an equality token.
|
// 3. The name token is eventually followed by an equality token.
|
||||||
Tok::Type => {
|
Tok::Type => {
|
||||||
if self.start_of_line {
|
if matches!(
|
||||||
|
self.position,
|
||||||
|
Position::Statement | Position::SimpleStatement
|
||||||
|
) {
|
||||||
let mut is_type_alias = false;
|
let mut is_type_alias = false;
|
||||||
if let Some(Ok((tok, _))) = self.underlying.peek() {
|
if let Some(Ok((tok, _))) = self.underlying.peek() {
|
||||||
if matches!(
|
if matches!(
|
||||||
|
@ -132,18 +139,56 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.start_of_line = next.as_ref().is_some_and(|lex_result| {
|
// Update the position, to track whether we're at the start of a logical line.
|
||||||
lex_result.as_ref().is_ok_and(|(tok, _)| {
|
if let Some(lex_result) = next.as_ref() {
|
||||||
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
|
if let Ok((tok, _)) = lex_result.as_ref() {
|
||||||
return self.start_of_line;
|
match tok {
|
||||||
|
Tok::NonLogicalNewline | Tok::Comment { .. } => {
|
||||||
|
// Nothing to do.
|
||||||
|
}
|
||||||
|
Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent => {
|
||||||
|
self.position = Position::Statement;
|
||||||
|
}
|
||||||
|
// If we see a semicolon, assume we're at the start of a simple statement, as in:
|
||||||
|
// ```python
|
||||||
|
// type X = int; type Y = float
|
||||||
|
// ```
|
||||||
|
Tok::Semi => {
|
||||||
|
self.position = Position::SimpleStatement;
|
||||||
|
}
|
||||||
|
// If we see a colon, and we're not in a nested context, assume we're at the
|
||||||
|
// start of a simple statement, as in:
|
||||||
|
// ```python
|
||||||
|
// class Class: type X = int
|
||||||
|
// ```
|
||||||
|
Tok::Colon if self.position == Position::Other => {
|
||||||
|
self.position = Position::SimpleStatement;
|
||||||
|
}
|
||||||
|
Tok::Lpar | Tok::Lsqb | Tok::Lbrace => {
|
||||||
|
self.position = if let Position::Nested(depth) = self.position {
|
||||||
|
Position::Nested(depth.saturating_add(1))
|
||||||
|
} else {
|
||||||
|
Position::Nested(1)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Tok::Rpar | Tok::Rsqb | Tok::Rbrace => {
|
||||||
|
self.position = if let Position::Nested(depth) = self.position {
|
||||||
|
let depth = depth.saturating_sub(1);
|
||||||
|
if depth > 0 {
|
||||||
|
Position::Nested(depth)
|
||||||
|
} else {
|
||||||
|
Position::Other
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Position::Other
|
||||||
|
};
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.position = Position::Other;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
matches!(
|
}
|
||||||
tok,
|
|
||||||
Tok::StartModule | Tok::Newline | Tok::Indent | Tok::Dedent
|
|
||||||
)
|
|
||||||
})
|
|
||||||
});
|
|
||||||
|
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
@ -161,3 +206,19 @@ fn soft_to_name(tok: &Tok) -> Tok {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum Position {
|
||||||
|
/// The lexer is at the start of a logical line, i.e., the start of a simple or compound statement.
|
||||||
|
Statement,
|
||||||
|
/// The lexer is at the start of a simple statement, e.g., a statement following a semicolon
|
||||||
|
/// or colon, as in:
|
||||||
|
/// ```python
|
||||||
|
/// class Class: type X = int
|
||||||
|
/// ```
|
||||||
|
SimpleStatement,
|
||||||
|
/// The lexer is within brackets, with the given bracket nesting depth.
|
||||||
|
Nested(u32),
|
||||||
|
/// The lexer is some other location.
|
||||||
|
Other,
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue