mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-10 06:35:17 +00:00
Use muiltipeek
This commit is contained in:
parent
8649bf6f8f
commit
f1f31324d0
2 changed files with 94 additions and 85 deletions
|
@ -14,11 +14,11 @@
|
||||||
|
|
||||||
use crate::lexer::{LexResult, Tok};
|
use crate::lexer::{LexResult, Tok};
|
||||||
pub use crate::mode::Mode;
|
pub use crate::mode::Mode;
|
||||||
|
use crate::soft_keywords::SoftKeywordTransformer;
|
||||||
use crate::{ast, error::ParseError, lexer, python};
|
use crate::{ast, error::ParseError, lexer, python};
|
||||||
use ast::Location;
|
use ast::Location;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use std::iter;
|
use std::iter;
|
||||||
use crate::soft_keywords::soft_keywords;
|
|
||||||
|
|
||||||
/// Parse a full Python program usually consisting of multiple lines.
|
/// Parse a full Python program usually consisting of multiple lines.
|
||||||
///
|
///
|
||||||
|
@ -190,7 +190,7 @@ pub fn parse_tokens(
|
||||||
.chain(lxr)
|
.chain(lxr)
|
||||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||||
python::TopParser::new()
|
python::TopParser::new()
|
||||||
.parse(soft_keywords(tokenizer, mode).into_iter())
|
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
|
||||||
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,10 @@
|
||||||
|
use itertools::{Itertools, MultiPeek};
|
||||||
|
|
||||||
use crate::lexer::{LexResult, Tok};
|
use crate::lexer::{LexResult, Tok};
|
||||||
pub use crate::mode::Mode;
|
pub use crate::mode::Mode;
|
||||||
|
|
||||||
/// Collect all tokens from a token stream in a vector.
|
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||||
fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResult> {
|
/// and `case`).
|
||||||
let mut tokens: Vec<LexResult> = vec![];
|
|
||||||
for tok in tokenizer {
|
|
||||||
let is_err = tok.is_err();
|
|
||||||
tokens.push(tok);
|
|
||||||
if is_err {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tokens
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Modify a token stream to accommodate soft keywords (namely, `match` and `case`).
|
|
||||||
///
|
///
|
||||||
/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords
|
/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords
|
||||||
/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain
|
/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain
|
||||||
|
@ -25,93 +15,112 @@ fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResu
|
||||||
///
|
///
|
||||||
/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and
|
/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and
|
||||||
/// parser, as neither of them need to be aware of soft keywords.
|
/// parser, as neither of them need to be aware of soft keywords.
|
||||||
pub fn soft_keywords(
|
pub struct SoftKeywordTransformer<I>
|
||||||
tokenizer: impl IntoIterator<Item = LexResult>,
|
where
|
||||||
mode: Mode,
|
I: Iterator<Item = LexResult>,
|
||||||
) -> Vec<LexResult> {
|
{
|
||||||
let mut tokenizer: Vec<LexResult> = collect_tokens(tokenizer);
|
pub underlying: MultiPeek<I>,
|
||||||
let mut start_of_line = matches!(mode, Mode::Module | Mode::Interactive);
|
pub start_of_line: bool,
|
||||||
for i in 0..tokenizer.len() {
|
}
|
||||||
// If the token is a `match` or `case` token, check if it's used as an identifier.
|
|
||||||
// We assume every `match` or `case` is an identifier unless both of the following
|
impl<I> SoftKeywordTransformer<I>
|
||||||
// conditions are met:
|
where
|
||||||
// 1. The token is at the start of a logical line.
|
I: Iterator<Item = LexResult>,
|
||||||
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
|
{
|
||||||
// inside a parenthesized expression, list, or dictionary).
|
pub fn new(tokenizer: I, mode: Mode) -> Self {
|
||||||
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
|
Self {
|
||||||
// (This is to avoid treating `match` and `case` as identifiers when annotated with
|
underlying: tokenizer.multipeek(),
|
||||||
// type hints.)
|
start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
|
||||||
if tokenizer[i]
|
}
|
||||||
.as_ref()
|
}
|
||||||
.map_or(false, |(_, tok, _)| matches!(tok, Tok::Match | Tok::Case))
|
}
|
||||||
{
|
|
||||||
let is_identifier = {
|
impl<I> Iterator for SoftKeywordTransformer<I>
|
||||||
if !start_of_line {
|
where
|
||||||
// If the `match` or `case` token is not at the start of a line, it's definitely
|
I: Iterator<Item = LexResult>,
|
||||||
// an identifier.
|
{
|
||||||
true
|
type Item = LexResult;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<LexResult> {
|
||||||
|
let mut next = self.underlying.next();
|
||||||
|
if let Some(Ok((start, tok, end))) = next.as_ref() {
|
||||||
|
// If the token is a `match` or `case` token, check if it's used as an identifier.
|
||||||
|
// We assume every `match` or `case` is an identifier unless both of the following
|
||||||
|
// conditions are met:
|
||||||
|
// 1. The token is at the start of a logical line.
|
||||||
|
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
|
||||||
|
// inside a parenthesized expression, list, or dictionary).
|
||||||
|
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
|
||||||
|
// (This is to avoid treating `match` and `case` as identifiers when annotated with
|
||||||
|
// type hints.)
|
||||||
|
if matches!(tok, Tok::Match | Tok::Case) {
|
||||||
|
if !self.start_of_line {
|
||||||
|
next = Some(Ok((
|
||||||
|
*start,
|
||||||
|
Tok::Name {
|
||||||
|
name: if matches!(tok, Tok::Match) {
|
||||||
|
"match".to_string()
|
||||||
|
} else {
|
||||||
|
"case".to_string()
|
||||||
|
},
|
||||||
|
},
|
||||||
|
*end,
|
||||||
|
)));
|
||||||
} else {
|
} else {
|
||||||
//
|
|
||||||
let mut seen_colon = false;
|
|
||||||
let mut first = true;
|
|
||||||
let mut par_count = 0;
|
let mut par_count = 0;
|
||||||
let mut sqb_count = 0;
|
let mut sqb_count = 0;
|
||||||
let mut brace_count = 0;
|
let mut brace_count = 0;
|
||||||
for (_, tok, _) in tokenizer.iter().skip(i + 1).flatten() {
|
let mut first = true;
|
||||||
|
let mut seen_colon = false;
|
||||||
|
while let Some(Ok((_, tok, _))) = self.underlying.peek() {
|
||||||
match tok {
|
match tok {
|
||||||
Tok::Newline => break,
|
Tok::Newline => break,
|
||||||
Tok::Colon if par_count == 0 && sqb_count == 0 && brace_count == 0 => {
|
Tok::Colon if par_count == 0 && sqb_count == 0 && brace_count == 0 => {
|
||||||
if !first {
|
if !first {
|
||||||
seen_colon = true;
|
seen_colon = true;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
Tok::Lpar => {
|
|
||||||
par_count += 1;
|
|
||||||
}
|
|
||||||
Tok::Rpar => {
|
|
||||||
par_count -= 1;
|
|
||||||
}
|
|
||||||
Tok::Lsqb => {
|
|
||||||
sqb_count += 1;
|
|
||||||
}
|
|
||||||
Tok::Rsqb => {
|
|
||||||
sqb_count -= 1;
|
|
||||||
}
|
|
||||||
Tok::Lbrace => {
|
|
||||||
brace_count += 1;
|
|
||||||
}
|
|
||||||
Tok::Rbrace => {
|
|
||||||
brace_count -= 1;
|
|
||||||
}
|
}
|
||||||
|
Tok::Lpar => par_count += 1,
|
||||||
|
Tok::Rpar => par_count -= 1,
|
||||||
|
Tok::Lsqb => sqb_count += 1,
|
||||||
|
Tok::Rsqb => sqb_count -= 1,
|
||||||
|
Tok::Lbrace => brace_count += 1,
|
||||||
|
Tok::Rbrace => brace_count -= 1,
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
!seen_colon
|
if !seen_colon {
|
||||||
}
|
next = Some(Ok((
|
||||||
};
|
*start,
|
||||||
if is_identifier {
|
Tok::Name {
|
||||||
if let Ok((_, tok, _)) = &mut tokenizer[i] {
|
name: if matches!(tok, Tok::Match) {
|
||||||
if let Tok::Match = tok {
|
"match".to_string()
|
||||||
*tok = Tok::Name {
|
} else {
|
||||||
name: "match".to_string(),
|
"case".to_string()
|
||||||
};
|
},
|
||||||
} else if let Tok::Case = tok {
|
},
|
||||||
*tok = Tok::Name {
|
*end,
|
||||||
name: "case".to_string(),
|
)));
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start_of_line = tokenizer[i].as_ref().map_or(false, |(_, tok, _)| {
|
|
||||||
matches!(
|
|
||||||
tok,
|
|
||||||
Tok::StartModule | Tok::StartInteractive | Tok::Newline | Tok::Indent | Tok::Dedent
|
|
||||||
)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenizer
|
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
||||||
|
lex_result.as_ref().map_or(false, |(_, tok, _)| {
|
||||||
|
matches!(
|
||||||
|
tok,
|
||||||
|
Tok::StartModule
|
||||||
|
| Tok::StartInteractive
|
||||||
|
| Tok::Newline
|
||||||
|
| Tok::Indent
|
||||||
|
| Tok::Dedent
|
||||||
|
)
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
next
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue