perf(parser): use memchr for lexing comments (#8193)

2025-10-01 22:31:23 +00:00 · 2023-10-26 20:07:43 -05:00 · 2023-10-26 20:07:43 -05:00 · e2b5c6ac5f
commit e2b5c6ac5f
parent c36efe254e
4 changed files with 22 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2381,6 +2381,7 @@ dependencies = [
 "itertools 0.11.0",
 "lalrpop",
 "lalrpop-util",
 "memchr",
 "ruff_python_ast",
 "ruff_text_size",
 "rustc-hash",
--- a/crates/ruff_python_parser/Cargo.toml
+++ b/crates/ruff_python_parser/Cargo.toml
@ -22,6 +22,7 @@ bitflags = { workspace = true }
 is-macro = { workspace = true }
 itertools = { workspace = true }
 lalrpop-util = { version = "0.20.0", default-features = false }
 memchr = { workspace = true }
 unicode-ident = { workspace = true }
 unicode_names2 = { workspace = true }
 rustc-hash = { workspace = true }
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@ -407,7 +407,9 @@ impl<'source> Lexer<'source> {
        #[cfg(debug_assertions)]
        debug_assert_eq!(self.cursor.previous(), '#');
-        self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
+        let bytes = self.cursor.rest().as_bytes();
        let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len());
        self.cursor.skip_bytes(offset);
        Tok::Comment(self.token_text().to_string())
    }
--- a/crates/ruff_python_parser/src/lexer/cursor.rs
+++ b/crates/ruff_python_parser/src/lexer/cursor.rs
@ -127,4 +127,21 @@ impl<'a> Cursor<'a> {
            self.bump();
        }
    }
    /// Skips the next `count` bytes.
    ///
    /// ## Panics
    ///  - If `count` is larger than the remaining bytes in the input stream.
    ///  - If `count` indexes into a multi-byte character.
    pub(super) fn skip_bytes(&mut self, count: usize) {
        #[cfg(debug_assertions)]
        {
            self.prev_char = self.chars.as_str()[..count]
                .chars()
                .next_back()
                .unwrap_or('\0');
        }
        self.chars = self.chars.as_str()[count..].chars();
    }
 }