mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-27 04:19:18 +00:00
Pull in RustPython parser (#6099)
This commit is contained in:
parent
86539c1fc5
commit
40f54375cb
779 changed files with 108400 additions and 2078 deletions
107
crates/ruff_python_parser/src/lexer/cursor.rs
Normal file
107
crates/ruff_python_parser/src/lexer/cursor.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use ruff_text_size::{TextLen, TextSize};
|
||||
use std::str::Chars;
|
||||
|
||||
pub(crate) const EOF_CHAR: char = '\0';
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct Cursor<'a> {
|
||||
chars: Chars<'a>,
|
||||
source_length: TextSize,
|
||||
#[cfg(debug_assertions)]
|
||||
prev_char: char,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
pub(crate) fn new(source: &'a str) -> Self {
|
||||
Self {
|
||||
source_length: source.text_len(),
|
||||
chars: source.chars(),
|
||||
#[cfg(debug_assertions)]
|
||||
prev_char: EOF_CHAR,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the previous token. Useful for debug assertions.
|
||||
#[cfg(debug_assertions)]
|
||||
pub(super) const fn previous(&self) -> char {
|
||||
self.prev_char
|
||||
}
|
||||
|
||||
/// Peeks the next character from the input stream without consuming it.
|
||||
/// Returns [`EOF_CHAR`] if the file is at the end of the file.
|
||||
pub(super) fn first(&self) -> char {
|
||||
self.chars.clone().next().unwrap_or(EOF_CHAR)
|
||||
}
|
||||
|
||||
/// Peeks the second character from the input stream without consuming it.
|
||||
/// Returns [`EOF_CHAR`] if the position is past the end of the file.
|
||||
pub(super) fn second(&self) -> char {
|
||||
let mut chars = self.chars.clone();
|
||||
chars.next();
|
||||
chars.next().unwrap_or(EOF_CHAR)
|
||||
}
|
||||
|
||||
/// Returns the remaining text to lex.
|
||||
pub(super) fn rest(&self) -> &'a str {
|
||||
self.chars.as_str()
|
||||
}
|
||||
|
||||
// SAFETY: The `source.text_len` call in `new` would panic if the string length is larger than a `u32`.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
pub(super) fn text_len(&self) -> TextSize {
|
||||
TextSize::new(self.chars.as_str().len() as u32)
|
||||
}
|
||||
|
||||
pub(super) fn token_len(&self) -> TextSize {
|
||||
self.source_length - self.text_len()
|
||||
}
|
||||
|
||||
pub(super) fn start_token(&mut self) {
|
||||
self.source_length = self.text_len();
|
||||
}
|
||||
|
||||
pub(super) fn is_eof(&self) -> bool {
|
||||
self.chars.as_str().is_empty()
|
||||
}
|
||||
|
||||
/// Consumes the next character
|
||||
pub(super) fn bump(&mut self) -> Option<char> {
|
||||
let prev = self.chars.next()?;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
self.prev_char = prev;
|
||||
}
|
||||
|
||||
Some(prev)
|
||||
}
|
||||
|
||||
pub(super) fn eat_char(&mut self, c: char) -> bool {
|
||||
if self.first() == c {
|
||||
self.bump();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn eat_if<F>(&mut self, mut predicate: F) -> Option<char>
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
if predicate(self.first()) && !self.is_eof() {
|
||||
self.bump()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Eats symbols while predicate returns true or until the end of file is reached.
|
||||
pub(super) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
|
||||
// It was tried making optimized version of this for eg. line comments, but
|
||||
// LLVM can inline all of this and compile it down to fast iteration over bytes.
|
||||
while predicate(self.first()) && !self.is_eof() {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
}
|
126
crates/ruff_python_parser/src/lexer/indentation.rs
Normal file
126
crates/ruff_python_parser/src/lexer/indentation.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
use static_assertions::assert_eq_size;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::Debug;
|
||||
|
||||
/// The column index of an indentation.
|
||||
///
|
||||
/// A space increments the column by one. A tab adds up to 2 (if tab size is 2) indices, but just one
|
||||
/// if the column isn't even.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Default)]
|
||||
pub(super) struct Column(u32);
|
||||
|
||||
impl Column {
|
||||
pub(super) const fn new(column: u32) -> Self {
|
||||
Self(column)
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of characters in an indentation. Each character accounts for 1.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Default)]
|
||||
pub(super) struct Character(u32);
|
||||
|
||||
impl Character {
|
||||
pub(super) const fn new(characters: u32) -> Self {
|
||||
Self(characters)
|
||||
}
|
||||
}
|
||||
|
||||
/// The [Indentation](https://docs.python.org/3/reference/lexical_analysis.html#indentation) of a logical line.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)]
|
||||
pub(super) struct Indentation {
|
||||
column: Column,
|
||||
character: Character,
|
||||
}
|
||||
|
||||
impl Indentation {
|
||||
const TAB_SIZE: u32 = 2;
|
||||
|
||||
pub(super) const fn root() -> Self {
|
||||
Self {
|
||||
column: Column::new(0),
|
||||
character: Character::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) const fn new(column: Column, character: Character) -> Self {
|
||||
Self { column, character }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub(super) fn add_space(self) -> Self {
|
||||
Self {
|
||||
character: Character(self.character.0 + 1),
|
||||
column: Column(self.column.0 + 1),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub(super) fn add_tab(self) -> Self {
|
||||
Self {
|
||||
character: Character(self.character.0 + 1),
|
||||
// Compute the column index:
|
||||
// * Adds `TAB_SIZE` if `column` is a multiple of `TAB_SIZE`
|
||||
// * Rounds `column` up to the next multiple of `TAB_SIZE` otherwise.
|
||||
// https://github.com/python/cpython/blob/2cf99026d6320f38937257da1ab014fc873a11a6/Parser/tokenizer.c#L1818
|
||||
column: Column((self.column.0 / Self::TAB_SIZE + 1) * Self::TAB_SIZE),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn try_compare(self, other: Indentation) -> Result<Ordering, UnexpectedIndentation> {
|
||||
let column_ordering = self.column.cmp(&other.column);
|
||||
let character_ordering = self.character.cmp(&other.character);
|
||||
|
||||
if column_ordering == character_ordering {
|
||||
Ok(column_ordering)
|
||||
} else {
|
||||
Err(UnexpectedIndentation)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||
pub(super) struct UnexpectedIndentation;
|
||||
|
||||
// The indentations stack is used to keep track of the current indentation level
|
||||
// [See Indentation](docs.python.org/3/reference/lexical_analysis.html#indentation).
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(super) struct Indentations {
|
||||
stack: Vec<Indentation>,
|
||||
}
|
||||
|
||||
impl Indentations {
|
||||
pub(super) fn push(&mut self, indent: Indentation) {
|
||||
debug_assert_eq!(self.current().try_compare(indent), Ok(Ordering::Less));
|
||||
|
||||
self.stack.push(indent);
|
||||
}
|
||||
|
||||
pub(super) fn pop(&mut self) -> Option<Indentation> {
|
||||
self.stack.pop()
|
||||
}
|
||||
|
||||
pub(super) fn current(&self) -> &Indentation {
|
||||
static ROOT: Indentation = Indentation::root();
|
||||
self.stack.last().unwrap_or(&ROOT)
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq_size!(Indentation, u64);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Character, Column, Indentation};
|
||||
use std::cmp::Ordering;
|
||||
|
||||
#[test]
|
||||
fn indentation_try_compare() {
|
||||
let tab = Indentation::new(Column::new(8), Character::new(1));
|
||||
|
||||
assert_eq!(tab.try_compare(tab), Ok(Ordering::Equal));
|
||||
|
||||
let two_tabs = Indentation::new(Column::new(16), Character::new(2));
|
||||
assert_eq!(two_tabs.try_compare(tab), Ok(Ordering::Greater));
|
||||
assert_eq!(tab.try_compare(two_tabs), Ok(Ordering::Less));
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue