mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-08 21:55:26 +00:00
Use muiltipeek
This commit is contained in:
parent
8649bf6f8f
commit
f1f31324d0
2 changed files with 94 additions and 85 deletions
|
@ -14,11 +14,11 @@
|
|||
|
||||
use crate::lexer::{LexResult, Tok};
|
||||
pub use crate::mode::Mode;
|
||||
use crate::soft_keywords::SoftKeywordTransformer;
|
||||
use crate::{ast, error::ParseError, lexer, python};
|
||||
use ast::Location;
|
||||
use itertools::Itertools;
|
||||
use std::iter;
|
||||
use crate::soft_keywords::soft_keywords;
|
||||
|
||||
/// Parse a full Python program usually consisting of multiple lines.
|
||||
///
|
||||
|
@ -190,7 +190,7 @@ pub fn parse_tokens(
|
|||
.chain(lxr)
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
python::TopParser::new()
|
||||
.parse(soft_keywords(tokenizer, mode).into_iter())
|
||||
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
|
||||
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
||||
}
|
||||
|
||||
|
|
|
@ -1,20 +1,10 @@
|
|||
use itertools::{Itertools, MultiPeek};
|
||||
|
||||
use crate::lexer::{LexResult, Tok};
|
||||
pub use crate::mode::Mode;
|
||||
|
||||
/// Collect all tokens from a token stream in a vector.
|
||||
fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResult> {
|
||||
let mut tokens: Vec<LexResult> = vec![];
|
||||
for tok in tokenizer {
|
||||
let is_err = tok.is_err();
|
||||
tokens.push(tok);
|
||||
if is_err {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
/// Modify a token stream to accommodate soft keywords (namely, `match` and `case`).
|
||||
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||
/// and `case`).
|
||||
///
|
||||
/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords
|
||||
/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain
|
||||
|
@ -25,93 +15,112 @@ fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResu
|
|||
///
|
||||
/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and
|
||||
/// parser, as neither of them need to be aware of soft keywords.
|
||||
pub fn soft_keywords(
|
||||
tokenizer: impl IntoIterator<Item = LexResult>,
|
||||
mode: Mode,
|
||||
) -> Vec<LexResult> {
|
||||
let mut tokenizer: Vec<LexResult> = collect_tokens(tokenizer);
|
||||
let mut start_of_line = matches!(mode, Mode::Module | Mode::Interactive);
|
||||
for i in 0..tokenizer.len() {
|
||||
// If the token is a `match` or `case` token, check if it's used as an identifier.
|
||||
// We assume every `match` or `case` is an identifier unless both of the following
|
||||
// conditions are met:
|
||||
// 1. The token is at the start of a logical line.
|
||||
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
|
||||
// inside a parenthesized expression, list, or dictionary).
|
||||
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
|
||||
// (This is to avoid treating `match` and `case` as identifiers when annotated with
|
||||
// type hints.)
|
||||
if tokenizer[i]
|
||||
.as_ref()
|
||||
.map_or(false, |(_, tok, _)| matches!(tok, Tok::Match | Tok::Case))
|
||||
{
|
||||
let is_identifier = {
|
||||
if !start_of_line {
|
||||
// If the `match` or `case` token is not at the start of a line, it's definitely
|
||||
// an identifier.
|
||||
true
|
||||
pub struct SoftKeywordTransformer<I>
|
||||
where
|
||||
I: Iterator<Item = LexResult>,
|
||||
{
|
||||
pub underlying: MultiPeek<I>,
|
||||
pub start_of_line: bool,
|
||||
}
|
||||
|
||||
impl<I> SoftKeywordTransformer<I>
|
||||
where
|
||||
I: Iterator<Item = LexResult>,
|
||||
{
|
||||
pub fn new(tokenizer: I, mode: Mode) -> Self {
|
||||
Self {
|
||||
underlying: tokenizer.multipeek(),
|
||||
start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Iterator for SoftKeywordTransformer<I>
|
||||
where
|
||||
I: Iterator<Item = LexResult>,
|
||||
{
|
||||
type Item = LexResult;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<LexResult> {
|
||||
let mut next = self.underlying.next();
|
||||
if let Some(Ok((start, tok, end))) = next.as_ref() {
|
||||
// If the token is a `match` or `case` token, check if it's used as an identifier.
|
||||
// We assume every `match` or `case` is an identifier unless both of the following
|
||||
// conditions are met:
|
||||
// 1. The token is at the start of a logical line.
|
||||
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
|
||||
// inside a parenthesized expression, list, or dictionary).
|
||||
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
|
||||
// (This is to avoid treating `match` and `case` as identifiers when annotated with
|
||||
// type hints.)
|
||||
if matches!(tok, Tok::Match | Tok::Case) {
|
||||
if !self.start_of_line {
|
||||
next = Some(Ok((
|
||||
*start,
|
||||
Tok::Name {
|
||||
name: if matches!(tok, Tok::Match) {
|
||||
"match".to_string()
|
||||
} else {
|
||||
"case".to_string()
|
||||
},
|
||||
},
|
||||
*end,
|
||||
)));
|
||||
} else {
|
||||
//
|
||||
let mut seen_colon = false;
|
||||
let mut first = true;
|
||||
let mut par_count = 0;
|
||||
let mut sqb_count = 0;
|
||||
let mut brace_count = 0;
|
||||
for (_, tok, _) in tokenizer.iter().skip(i + 1).flatten() {
|
||||
let mut first = true;
|
||||
let mut seen_colon = false;
|
||||
while let Some(Ok((_, tok, _))) = self.underlying.peek() {
|
||||
match tok {
|
||||
Tok::Newline => break,
|
||||
Tok::Colon if par_count == 0 && sqb_count == 0 && brace_count == 0 => {
|
||||
if !first {
|
||||
seen_colon = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Tok::Lpar => {
|
||||
par_count += 1;
|
||||
}
|
||||
Tok::Rpar => {
|
||||
par_count -= 1;
|
||||
}
|
||||
Tok::Lsqb => {
|
||||
sqb_count += 1;
|
||||
}
|
||||
Tok::Rsqb => {
|
||||
sqb_count -= 1;
|
||||
}
|
||||
Tok::Lbrace => {
|
||||
brace_count += 1;
|
||||
}
|
||||
Tok::Rbrace => {
|
||||
brace_count -= 1;
|
||||
}
|
||||
Tok::Lpar => par_count += 1,
|
||||
Tok::Rpar => par_count -= 1,
|
||||
Tok::Lsqb => sqb_count += 1,
|
||||
Tok::Rsqb => sqb_count -= 1,
|
||||
Tok::Lbrace => brace_count += 1,
|
||||
Tok::Rbrace => brace_count -= 1,
|
||||
_ => {}
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
!seen_colon
|
||||
}
|
||||
};
|
||||
if is_identifier {
|
||||
if let Ok((_, tok, _)) = &mut tokenizer[i] {
|
||||
if let Tok::Match = tok {
|
||||
*tok = Tok::Name {
|
||||
name: "match".to_string(),
|
||||
};
|
||||
} else if let Tok::Case = tok {
|
||||
*tok = Tok::Name {
|
||||
name: "case".to_string(),
|
||||
};
|
||||
if !seen_colon {
|
||||
next = Some(Ok((
|
||||
*start,
|
||||
Tok::Name {
|
||||
name: if matches!(tok, Tok::Match) {
|
||||
"match".to_string()
|
||||
} else {
|
||||
"case".to_string()
|
||||
},
|
||||
},
|
||||
*end,
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
start_of_line = tokenizer[i].as_ref().map_or(false, |(_, tok, _)| {
|
||||
matches!(
|
||||
tok,
|
||||
Tok::StartModule | Tok::StartInteractive | Tok::Newline | Tok::Indent | Tok::Dedent
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
tokenizer
|
||||
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
||||
lex_result.as_ref().map_or(false, |(_, tok, _)| {
|
||||
matches!(
|
||||
tok,
|
||||
Tok::StartModule
|
||||
| Tok::StartInteractive
|
||||
| Tok::Newline
|
||||
| Tok::Indent
|
||||
| Tok::Dedent
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
next
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue