From 21f2d0c90b68b4dc9daae529e4b5e718b68e59d4 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 16 Jan 2024 09:23:43 +0100 Subject: [PATCH] Add an explicit fast path for whitespace to `is_identifier_continuation` (#9532) --- crates/ruff_python_parser/src/lexer.rs | 9 ++++++--- crates/ruff_python_stdlib/src/identifiers.rs | 9 ++++++--- crates/ruff_python_trivia/src/tokenizer.rs | 2 ++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 0bd075c3c3..517dd4b441 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1498,9 +1498,12 @@ fn is_unicode_identifier_start(c: char) -> bool { // Checks if the character c is a valid continuation character as described // in https://docs.python.org/3/reference/lexical_analysis.html#identifiers fn is_identifier_continuation(c: char) -> bool { - match c { - 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => true, - c => is_xid_continue(c), + // Arrange things such that ASCII codepoints never + // result in the slower `is_xid_continue` getting called. + if c.is_ascii() { + matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9') + } else { + is_xid_continue(c) } } diff --git a/crates/ruff_python_stdlib/src/identifiers.rs b/crates/ruff_python_stdlib/src/identifiers.rs index ab29b3877c..950b128c98 100644 --- a/crates/ruff_python_stdlib/src/identifiers.rs +++ b/crates/ruff_python_stdlib/src/identifiers.rs @@ -33,9 +33,12 @@ fn is_identifier_start(c: char) -> bool { // Checks if the character c is a valid continuation character as described // in https://docs.python.org/3/reference/lexical_analysis.html#identifiers fn is_identifier_continuation(c: char) -> bool { - match c { - 'a'..='z' | 'A'..='Z' | '_' | '0'..='9' => true, - c => is_xid_continue(c), + // Arrange things such that ASCII codepoints never + // result in the slower `is_xid_continue` getting called. + if c.is_ascii() { + matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9') + } else { + is_xid_continue(c) } } diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index 7865a80187..dad44dd51a 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -136,6 +136,8 @@ fn is_identifier_start(c: char) -> bool { // Checks if the character c is a valid continuation character as described // in https://docs.python.org/3/reference/lexical_analysis.html#identifiers fn is_identifier_continuation(c: char) -> bool { + // Arrange things such that ASCII codepoints never + // result in the slower `is_xid_continue` getting called. if c.is_ascii() { matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '0'..='9') } else {