Merge pull request #4519 from charliermarsh/charlie/match

Add support for match statements to parser
This commit is contained in:
Jim Fasarakis-Hilliard 2023-02-21 19:43:28 +02:00 committed by GitHub
commit c137bc9d77
8 changed files with 10810 additions and 4 deletions

View file

@ -131,4 +131,5 @@ mod string;
#[rustfmt::skip]
mod python;
mod context;
mod soft_keywords;
pub mod token;

View file

@ -14,6 +14,7 @@
use crate::lexer::{LexResult, Tok};
pub use crate::mode::Mode;
use crate::soft_keywords::SoftKeywordTransformer;
use crate::{ast, error::ParseError, lexer, python};
use ast::Location;
use itertools::Itertools;
@ -188,9 +189,8 @@ pub fn parse_tokens(
let tokenizer = iter::once(Ok(marker_token))
.chain(lxr)
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
python::TopParser::new()
.parse(tokenizer)
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
}
@ -462,4 +462,205 @@ except* OSError as e:
assert!(parse(&source, Mode::Module, "<embedded>").is_ok());
assert!(parse(&source, Mode::Interactive, "<embedded>").is_ok());
}
#[test]
fn test_match_as_identifier() {
let parse_ast = parse_program(
r#"
match *a + b, c # ((match * a) + b), c
match *(a + b), c # (match * (a + b)), c
match (*a + b, c) # match ((*(a + b)), c)
match -a * b + c # (match - (a * b)) + c
match -(a * b) + c # (match - (a * b)) + c
match (-a) * b + c # (match (-(a * b))) + c
match ().a # (match()).a
match (()).a # (match(())).a
match ((),).a # (match(())).a
match [a].b # (match[a]).b
match [a,].b # (match[(a,)]).b (not (match[a]).b)
match [(a,)].b # (match[(a,)]).b
match()[a:
b] # (match())[a: b]
if match := 1: pass
match match:
case 1: pass
case 2:
pass
"#,
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_match_complex() {
let source = r#"# Cases sampled from Lib/test/test_patma.py
# case test_patma_098
match x:
case -0j:
y = 0
# case test_patma_142
match x:
case bytes(z):
y = 0
# case test_patma_073
match x:
case 0 if 0:
y = 0
case 0 if 1:
y = 1
# case test_patma_006
match 3:
case 0 | 1 | 2 | 3:
x = True
# case test_patma_049
match x:
case [0, 1] | [1, 0]:
y = 0
# case black_check_sequence_then_mapping
match x:
case [*_]:
return "seq"
case {}:
return "map"
# case test_patma_035
match x:
case {0: [1, 2, {}]}:
y = 0
case {0: [1, 2, {}] | True} | {1: [[]]} | {0: [1, 2, {}]} | [] | "X" | {}:
y = 1
case []:
y = 2
# case test_patma_107
match x:
case 0.25 + 1.75j:
y = 0
# case test_patma_097
match x:
case -0j:
y = 0
# case test_patma_007
match 4:
case 0 | 1 | 2 | 3:
x = True
# case test_patma_154
match x:
case 0 if x:
y = 0
# case test_patma_134
match x:
case {1: 0}:
y = 0
case {0: 0}:
y = 1
case {**z}:
y = 2
# case test_patma_185
match Seq():
case [*_]:
y = 0
# case test_patma_063
match x:
case 1:
y = 0
case 1:
y = 1
# case test_patma_248
match x:
case {"foo": bar}:
y = bar
# case test_patma_019
match (0, 1, 2):
case [0, 1, *x, 2]:
y = 0
# case test_patma_052
match x:
case [0]:
y = 0
case [1, 0] if (x := x[:0]):
y = 1
case [1, 0]:
y = 2
# case test_patma_191
match w:
case [x, y, *_]:
z = 0
# case test_patma_110
match x:
case -0.25 - 1.75j:
y = 0
# case test_patma_151
match (x,):
case [y]:
z = 0
# case test_patma_114
match x:
case A.B.C.D:
y = 0
# case test_patma_232
match x:
case None:
y = 0
# case test_patma_058
match x:
case 0:
y = 0
# case test_patma_233
match x:
case False:
y = 0
# case test_patma_078
match x:
case []:
y = 0
case [""]:
y = 1
case "":
y = 2
# case test_patma_156
match x:
case z:
y = 0
# case test_patma_189
match w:
case [x, y, *rest]:
z = 0
# case test_patma_042
match x:
case (0 as z) | (1 as z) | (2 as z) if z == x % 2:
y = 0
# case test_patma_034
match x:
case {0: [1, 2, {}]}:
y = 0
case {0: [1, 2, {}] | False} | {1: [[]]} | {0: [1, 2, {}]} | [] | "X" | {}:
y = 1
case []:
y = 2
# case test_patma_123
match (0, 1, 2):
case 0, *x:
y = 0
# case test_patma_126
match (0, 1, 2):
case *x, 2,:
y = 0
# case test_patma_151
match x,:
case y,:
z = 0
# case test_patma_152
match w, x:
case y, z:
v = 0
# case test_patma_153
match w := x,:
case y as v,:
z = 0
"#;
let parse_ast = parse_program(source, "<test>").unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

112
parser/src/soft_keywords.rs Normal file
View file

@ -0,0 +1,112 @@
use itertools::{Itertools, MultiPeek};
use crate::lexer::{LexResult, Tok};
pub use crate::mode::Mode;
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
/// and `case`).
///
/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords
/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain
/// contexts.
///
/// This function modifies a token stream to accommodate this change. In particular, it replaces
/// `match` and `case` tokens with `identifier` tokens if they are used as identifiers.
///
/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and
/// parser, as neither of them need to be aware of soft keywords.
pub struct SoftKeywordTransformer<I>
where
I: Iterator<Item = LexResult>,
{
pub underlying: MultiPeek<I>,
pub start_of_line: bool,
}
impl<I> SoftKeywordTransformer<I>
where
I: Iterator<Item = LexResult>,
{
pub fn new(tokenizer: I, mode: Mode) -> Self {
Self {
underlying: tokenizer.multipeek(),
start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
}
}
}
impl<I> Iterator for SoftKeywordTransformer<I>
where
I: Iterator<Item = LexResult>,
{
type Item = LexResult;
#[inline]
fn next(&mut self) -> Option<LexResult> {
let mut next = self.underlying.next();
if let Some(Ok((start, tok, end))) = next.as_ref() {
// If the token is a `match` or `case` token, check if it's used as an identifier.
// We assume every `match` or `case` is an identifier unless both of the following
// conditions are met:
// 1. The token is at the start of a logical line.
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
// inside a parenthesized expression, list, or dictionary).
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
// (This is to avoid treating `match` and `case` as identifiers when annotated with
// type hints.)
if matches!(tok, Tok::Match | Tok::Case) {
if !self.start_of_line {
next = Some(Ok((*start, soft_to_name(tok), *end)));
} else {
let mut nesting = 0;
let mut first = true;
let mut seen_colon = false;
while let Some(Ok((_, tok, _))) = self.underlying.peek() {
match tok {
Tok::Newline => break,
Tok::Colon if nesting == 0 => {
if !first {
seen_colon = true;
}
}
Tok::Lpar | Tok::Lsqb | Tok::Lbrace => nesting += 1,
Tok::Rpar | Tok::Rsqb | Tok::Rbrace => nesting -= 1,
_ => {}
}
first = false;
}
if !seen_colon {
next = Some(Ok((*start, soft_to_name(tok), *end)));
}
}
}
}
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
lex_result.as_ref().map_or(false, |(_, tok, _)| {
matches!(
tok,
Tok::StartModule
| Tok::StartInteractive
| Tok::Newline
| Tok::Indent
| Tok::Dedent
)
})
});
next
}
}
#[inline]
fn soft_to_name(tok: &Tok) -> Tok {
let name = match tok {
Tok::Match => "match",
Tok::Case => "case",
_ => unreachable!("other tokens never reach here"),
};
Tok::Name {
name: name.to_owned(),
}
}

View file

@ -184,6 +184,8 @@ pub enum Tok {
Return,
Try,
While,
Match,
Case,
With,
Yield,
@ -297,6 +299,8 @@ impl fmt::Display for Tok {
Return => f.write_str("'return'"),
Try => f.write_str("'try'"),
While => f.write_str("'while'"),
Match => f.write_str("'match'"),
Case => f.write_str("'case'"),
With => f.write_str("'with'"),
Yield => f.write_str("'yield'"),
ColonEqual => f.write_str("':='"),