mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-23 04:55:09 +00:00
Use shared Cursor
across crates (#5715)
## Summary We have two `Cursor` implementations. This PR moves the implementation from the formatter into `ruff_python_whitespace` (kind of a poorly-named crate now) and uses it for both use-cases.
This commit is contained in:
parent
6ce252f0ed
commit
6dbc6d2e59
6 changed files with 163 additions and 210 deletions
|
@ -1,9 +1,8 @@
|
|||
use std::str::Chars;
|
||||
|
||||
use ruff_python_whitespace::is_python_whitespace;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use unic_ucd_ident::{is_xid_continue, is_xid_start};
|
||||
|
||||
use ruff_python_whitespace::{is_python_whitespace, Cursor};
|
||||
|
||||
/// Searches for the first non-trivia character in `range`.
|
||||
///
|
||||
/// The search skips over any whitespace and comments.
|
||||
|
@ -402,9 +401,7 @@ impl<'a> SimpleTokenizer<'a> {
|
|||
|
||||
// Skip the test whether there's a preceding comment if it has been performed before.
|
||||
if !self.back_line_has_no_comment {
|
||||
let rest = self.cursor.chars.as_str();
|
||||
|
||||
for (back_index, c) in rest.chars().rev().enumerate() {
|
||||
for (back_index, c) in self.cursor.chars().rev().enumerate() {
|
||||
match c {
|
||||
'#' => {
|
||||
// Potentially a comment
|
||||
|
@ -515,100 +512,6 @@ impl DoubleEndedIterator for SimpleTokenizer<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
const EOF_CHAR: char = '\0';
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Cursor<'a> {
|
||||
chars: Chars<'a>,
|
||||
source_length: TextSize,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
fn new(source: &'a str) -> Self {
|
||||
Self {
|
||||
source_length: source.text_len(),
|
||||
chars: source.chars(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Peeks the next character from the input stream without consuming it.
|
||||
/// Returns [`EOF_CHAR`] if the file is at the end of the file.
|
||||
fn first(&self) -> char {
|
||||
self.chars.clone().next().unwrap_or(EOF_CHAR)
|
||||
}
|
||||
|
||||
/// Peeks the next character from the input stream without consuming it.
|
||||
/// Returns [`EOF_CHAR`] if the file is at the end of the file.
|
||||
fn last(&self) -> char {
|
||||
self.chars.clone().next_back().unwrap_or(EOF_CHAR)
|
||||
}
|
||||
|
||||
// SAFETY: THe `source.text_len` call in `new` would panic if the string length is larger than a `u32`.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
fn text_len(&self) -> TextSize {
|
||||
TextSize::new(self.chars.as_str().len() as u32)
|
||||
}
|
||||
|
||||
fn token_len(&self) -> TextSize {
|
||||
self.source_length - self.text_len()
|
||||
}
|
||||
|
||||
fn start_token(&mut self) {
|
||||
self.source_length = self.text_len();
|
||||
}
|
||||
|
||||
/// Returns `true` if the file is at the end of the file.
|
||||
fn is_eof(&self) -> bool {
|
||||
self.chars.as_str().is_empty()
|
||||
}
|
||||
|
||||
/// Consumes the next character
|
||||
fn bump(&mut self) -> Option<char> {
|
||||
self.chars.next()
|
||||
}
|
||||
|
||||
/// Consumes the next character from the back
|
||||
fn bump_back(&mut self) -> Option<char> {
|
||||
self.chars.next_back()
|
||||
}
|
||||
|
||||
fn eat_char(&mut self, c: char) -> bool {
|
||||
if self.first() == c {
|
||||
self.bump();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_char_back(&mut self, c: char) -> bool {
|
||||
if self.last() == c {
|
||||
self.bump_back();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Eats symbols while predicate returns true or until the end of file is reached.
|
||||
fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
|
||||
// It was tried making optimized version of this for eg. line comments, but
|
||||
// LLVM can inline all of this and compile it down to fast iteration over bytes.
|
||||
while predicate(self.first()) && !self.is_eof() {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
|
||||
/// Eats symbols from the back while predicate returns true or until the beginning of file is reached.
|
||||
fn eat_back_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
|
||||
// It was tried making optimized version of this for eg. line comments, but
|
||||
// LLVM can inline all of this and compile it down to fast iteration over bytes.
|
||||
while predicate(self.last()) && !self.is_eof() {
|
||||
self.bump_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use insta::assert_debug_snapshot;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue