mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 13:33:50 +00:00
Move token and error structs into related modules (#11957)
## Summary This PR does some housekeeping into moving certain structs into related modules. Specifically, 1. Move `LexicalError` from `lexer.rs` to `error.rs` which also contains the `ParseError` 2. Move `Token`, `TokenFlags` and `TokenValue` from `lexer.rs` to `token.rs`
This commit is contained in:
parent
4667d8697c
commit
96da136e6a
10 changed files with 352 additions and 342 deletions
|
@ -2,7 +2,6 @@ use std::fmt;
|
||||||
|
|
||||||
use ruff_text_size::TextRange;
|
use ruff_text_size::TextRange;
|
||||||
|
|
||||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
|
||||||
use crate::TokenKind;
|
use crate::TokenKind;
|
||||||
|
|
||||||
/// Represents represent errors that occur during parsing and are
|
/// Represents represent errors that occur during parsing and are
|
||||||
|
@ -295,3 +294,135 @@ impl std::fmt::Display for ParseErrorType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Represents an error that occur during lexing and are
|
||||||
|
/// returned by the `parse_*` functions in the iterator in the
|
||||||
|
/// [lexer] implementation.
|
||||||
|
///
|
||||||
|
/// [lexer]: crate::lexer
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct LexicalError {
|
||||||
|
/// The type of error that occurred.
|
||||||
|
error: LexicalErrorType,
|
||||||
|
/// The location of the error.
|
||||||
|
location: TextRange,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LexicalError {
|
||||||
|
/// Creates a new `LexicalError` with the given error type and location.
|
||||||
|
pub fn new(error: LexicalErrorType, location: TextRange) -> Self {
|
||||||
|
Self { error, location }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn error(&self) -> &LexicalErrorType {
|
||||||
|
&self.error
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_error(self) -> LexicalErrorType {
|
||||||
|
self.error
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn location(&self) -> TextRange {
|
||||||
|
self.location
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Deref for LexicalError {
|
||||||
|
type Target = LexicalErrorType;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
self.error()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for LexicalError {
|
||||||
|
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||||
|
Some(self.error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LexicalError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{} at byte offset {}",
|
||||||
|
self.error(),
|
||||||
|
u32::from(self.location().start())
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents the different types of errors that can occur during lexing.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum LexicalErrorType {
|
||||||
|
// TODO: Can probably be removed, the places it is used seem to be able
|
||||||
|
// to use the `UnicodeError` variant instead.
|
||||||
|
#[doc(hidden)]
|
||||||
|
StringError,
|
||||||
|
/// A string literal without the closing quote.
|
||||||
|
UnclosedStringError,
|
||||||
|
/// Decoding of a unicode escape sequence in a string literal failed.
|
||||||
|
UnicodeError,
|
||||||
|
/// Missing the `{` for unicode escape sequence.
|
||||||
|
MissingUnicodeLbrace,
|
||||||
|
/// Missing the `}` for unicode escape sequence.
|
||||||
|
MissingUnicodeRbrace,
|
||||||
|
/// The indentation is not consistent.
|
||||||
|
IndentationError,
|
||||||
|
/// An unrecognized token was encountered.
|
||||||
|
UnrecognizedToken { tok: char },
|
||||||
|
/// An f-string error containing the [`FStringErrorType`].
|
||||||
|
FStringError(FStringErrorType),
|
||||||
|
/// Invalid character encountered in a byte literal.
|
||||||
|
InvalidByteLiteral,
|
||||||
|
/// An unexpected character was encountered after a line continuation.
|
||||||
|
LineContinuationError,
|
||||||
|
/// An unexpected end of file was encountered.
|
||||||
|
Eof,
|
||||||
|
/// An unexpected error occurred.
|
||||||
|
OtherError(Box<str>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for LexicalErrorType {}
|
||||||
|
|
||||||
|
impl std::fmt::Display for LexicalErrorType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
||||||
|
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
|
||||||
|
LexicalErrorType::InvalidByteLiteral => {
|
||||||
|
write!(f, "bytes can only contain ASCII literal characters")
|
||||||
|
}
|
||||||
|
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
||||||
|
LexicalErrorType::IndentationError => {
|
||||||
|
write!(f, "unindent does not match any outer indentation level")
|
||||||
|
}
|
||||||
|
LexicalErrorType::UnrecognizedToken { tok } => {
|
||||||
|
write!(f, "Got unexpected token {tok}")
|
||||||
|
}
|
||||||
|
LexicalErrorType::LineContinuationError => {
|
||||||
|
write!(f, "unexpected character after line continuation character")
|
||||||
|
}
|
||||||
|
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
||||||
|
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
||||||
|
LexicalErrorType::UnclosedStringError => {
|
||||||
|
write!(f, "missing closing quote in string literal")
|
||||||
|
}
|
||||||
|
LexicalErrorType::MissingUnicodeLbrace => {
|
||||||
|
write!(f, "Missing `{{` in Unicode escape sequence")
|
||||||
|
}
|
||||||
|
LexicalErrorType::MissingUnicodeRbrace => {
|
||||||
|
write!(f, "Missing `}}` in Unicode escape sequence")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
mod sizes {
|
||||||
|
use crate::error::{LexicalError, LexicalErrorType};
|
||||||
|
use static_assertions::assert_eq_size;
|
||||||
|
|
||||||
|
assert_eq_size!(LexicalErrorType, [u8; 24]);
|
||||||
|
assert_eq_size!(LexicalError, [u8; 32]);
|
||||||
|
}
|
||||||
|
|
|
@ -9,23 +9,19 @@
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use bitflags::bitflags;
|
|
||||||
use unicode_ident::{is_xid_continue, is_xid_start};
|
use unicode_ident::{is_xid_continue, is_xid_start};
|
||||||
use unicode_normalization::UnicodeNormalization;
|
use unicode_normalization::UnicodeNormalization;
|
||||||
|
|
||||||
use ruff_python_ast::str::Quote;
|
use ruff_python_ast::{Int, IpyEscapeKind, StringFlags};
|
||||||
use ruff_python_ast::str_prefix::{
|
|
||||||
AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix,
|
|
||||||
};
|
|
||||||
use ruff_python_ast::{AnyStringFlags, Int, IpyEscapeKind, StringFlags};
|
|
||||||
use ruff_python_trivia::is_python_whitespace;
|
use ruff_python_trivia::is_python_whitespace;
|
||||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::error::FStringErrorType;
|
use crate::error::{FStringErrorType, LexicalError, LexicalErrorType};
|
||||||
use crate::lexer::cursor::{Cursor, EOF_CHAR};
|
use crate::lexer::cursor::{Cursor, EOF_CHAR};
|
||||||
use crate::lexer::fstring::{FStringContext, FStrings, FStringsCheckpoint};
|
use crate::lexer::fstring::{FStringContext, FStrings, FStringsCheckpoint};
|
||||||
use crate::lexer::indentation::{Indentation, Indentations, IndentationsCheckpoint};
|
use crate::lexer::indentation::{Indentation, Indentations, IndentationsCheckpoint};
|
||||||
use crate::{Mode, TokenKind};
|
use crate::token::{TokenFlags, TokenKind, TokenValue};
|
||||||
|
use crate::Mode;
|
||||||
|
|
||||||
mod cursor;
|
mod cursor;
|
||||||
mod fstring;
|
mod fstring;
|
||||||
|
@ -1511,317 +1507,6 @@ impl<'src> Lexer<'src> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bitflags! {
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
||||||
pub(crate) struct TokenFlags: u8 {
|
|
||||||
/// The token is a string with double quotes (`"`).
|
|
||||||
const DOUBLE_QUOTES = 1 << 0;
|
|
||||||
/// The token is a triple-quoted string i.e., it starts and ends with three consecutive
|
|
||||||
/// quote characters (`"""` or `'''`).
|
|
||||||
const TRIPLE_QUOTED_STRING = 1 << 1;
|
|
||||||
|
|
||||||
/// The token is a unicode string i.e., prefixed with `u` or `U`
|
|
||||||
const UNICODE_STRING = 1 << 2;
|
|
||||||
/// The token is a byte string i.e., prefixed with `b` or `B`
|
|
||||||
const BYTE_STRING = 1 << 3;
|
|
||||||
/// The token is an f-string i.e., prefixed with `f` or `F`
|
|
||||||
const F_STRING = 1 << 4;
|
|
||||||
/// The token is a raw string and the prefix character is in lowercase.
|
|
||||||
const RAW_STRING_LOWERCASE = 1 << 5;
|
|
||||||
/// The token is a raw string and the prefix character is in uppercase.
|
|
||||||
const RAW_STRING_UPPERCASE = 1 << 6;
|
|
||||||
|
|
||||||
/// The token is a raw string i.e., prefixed with `r` or `R`
|
|
||||||
const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StringFlags for TokenFlags {
|
|
||||||
fn quote_style(self) -> Quote {
|
|
||||||
if self.intersects(TokenFlags::DOUBLE_QUOTES) {
|
|
||||||
Quote::Double
|
|
||||||
} else {
|
|
||||||
Quote::Single
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_triple_quoted(self) -> bool {
|
|
||||||
self.intersects(TokenFlags::TRIPLE_QUOTED_STRING)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prefix(self) -> AnyStringPrefix {
|
|
||||||
if self.intersects(TokenFlags::F_STRING) {
|
|
||||||
if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
|
||||||
AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false })
|
|
||||||
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
|
||||||
AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true })
|
|
||||||
} else {
|
|
||||||
AnyStringPrefix::Format(FStringPrefix::Regular)
|
|
||||||
}
|
|
||||||
} else if self.intersects(TokenFlags::BYTE_STRING) {
|
|
||||||
if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
|
||||||
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
|
|
||||||
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
|
||||||
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
|
|
||||||
} else {
|
|
||||||
AnyStringPrefix::Bytes(ByteStringPrefix::Regular)
|
|
||||||
}
|
|
||||||
} else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
|
||||||
AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false })
|
|
||||||
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
|
||||||
AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true })
|
|
||||||
} else if self.intersects(TokenFlags::UNICODE_STRING) {
|
|
||||||
AnyStringPrefix::Regular(StringLiteralPrefix::Unicode)
|
|
||||||
} else {
|
|
||||||
AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TokenFlags {
|
|
||||||
/// Returns `true` if the token is an f-string.
|
|
||||||
const fn is_f_string(self) -> bool {
|
|
||||||
self.intersects(TokenFlags::F_STRING)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if the token is a triple-quoted f-string.
|
|
||||||
fn is_triple_quoted_fstring(self) -> bool {
|
|
||||||
self.contains(TokenFlags::F_STRING | TokenFlags::TRIPLE_QUOTED_STRING)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if the token is a raw string.
|
|
||||||
const fn is_raw_string(self) -> bool {
|
|
||||||
self.intersects(TokenFlags::RAW_STRING)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn as_any_string_flags(self) -> AnyStringFlags {
|
|
||||||
AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
||||||
pub struct Token {
|
|
||||||
/// The kind of the token.
|
|
||||||
kind: TokenKind,
|
|
||||||
/// The range of the token.
|
|
||||||
range: TextRange,
|
|
||||||
/// The set of flags describing this token.
|
|
||||||
flags: TokenFlags,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Token {
|
|
||||||
pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token {
|
|
||||||
Self { kind, range, flags }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the token kind.
|
|
||||||
#[inline]
|
|
||||||
pub const fn kind(&self) -> TokenKind {
|
|
||||||
self.kind
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the token as a tuple of (kind, range).
|
|
||||||
#[inline]
|
|
||||||
pub const fn as_tuple(&self) -> (TokenKind, TextRange) {
|
|
||||||
(self.kind, self.range)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if this is any kind of string token.
|
|
||||||
const fn is_any_string(self) -> bool {
|
|
||||||
matches!(
|
|
||||||
self.kind,
|
|
||||||
TokenKind::String
|
|
||||||
| TokenKind::FStringStart
|
|
||||||
| TokenKind::FStringMiddle
|
|
||||||
| TokenKind::FStringEnd
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if the current token is a triple-quoted string of any kind.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// If it isn't a string or any f-string tokens.
|
|
||||||
pub fn is_triple_quoted_string(self) -> bool {
|
|
||||||
assert!(self.is_any_string());
|
|
||||||
self.flags.is_triple_quoted()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the [`Quote`] style for the current string token of any kind.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// If it isn't a string or any f-string tokens.
|
|
||||||
pub fn string_quote_style(self) -> Quote {
|
|
||||||
assert!(self.is_any_string());
|
|
||||||
self.flags.quote_style()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Ranged for Token {
|
|
||||||
fn range(&self) -> TextRange {
|
|
||||||
self.range
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents an error that occur during lexing and are
|
|
||||||
/// returned by the `parse_*` functions in the iterator in the
|
|
||||||
/// [lexer] implementation.
|
|
||||||
///
|
|
||||||
/// [lexer]: crate::lexer
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub struct LexicalError {
|
|
||||||
/// The type of error that occurred.
|
|
||||||
error: LexicalErrorType,
|
|
||||||
/// The location of the error.
|
|
||||||
location: TextRange,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl LexicalError {
|
|
||||||
/// Creates a new `LexicalError` with the given error type and location.
|
|
||||||
pub fn new(error: LexicalErrorType, location: TextRange) -> Self {
|
|
||||||
Self { error, location }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn error(&self) -> &LexicalErrorType {
|
|
||||||
&self.error
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn into_error(self) -> LexicalErrorType {
|
|
||||||
self.error
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn location(&self) -> TextRange {
|
|
||||||
self.location
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::ops::Deref for LexicalError {
|
|
||||||
type Target = LexicalErrorType;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
self.error()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for LexicalError {
|
|
||||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
|
||||||
Some(self.error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for LexicalError {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"{} at byte offset {}",
|
|
||||||
self.error(),
|
|
||||||
u32::from(self.location().start())
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents the different types of errors that can occur during lexing.
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub enum LexicalErrorType {
|
|
||||||
// TODO: Can probably be removed, the places it is used seem to be able
|
|
||||||
// to use the `UnicodeError` variant instead.
|
|
||||||
#[doc(hidden)]
|
|
||||||
StringError,
|
|
||||||
/// A string literal without the closing quote.
|
|
||||||
UnclosedStringError,
|
|
||||||
/// Decoding of a unicode escape sequence in a string literal failed.
|
|
||||||
UnicodeError,
|
|
||||||
/// Missing the `{` for unicode escape sequence.
|
|
||||||
MissingUnicodeLbrace,
|
|
||||||
/// Missing the `}` for unicode escape sequence.
|
|
||||||
MissingUnicodeRbrace,
|
|
||||||
/// The indentation is not consistent.
|
|
||||||
IndentationError,
|
|
||||||
/// An unrecognized token was encountered.
|
|
||||||
UnrecognizedToken { tok: char },
|
|
||||||
/// An f-string error containing the [`FStringErrorType`].
|
|
||||||
FStringError(FStringErrorType),
|
|
||||||
/// Invalid character encountered in a byte literal.
|
|
||||||
InvalidByteLiteral,
|
|
||||||
/// An unexpected character was encountered after a line continuation.
|
|
||||||
LineContinuationError,
|
|
||||||
/// An unexpected end of file was encountered.
|
|
||||||
Eof,
|
|
||||||
/// An unexpected error occurred.
|
|
||||||
OtherError(Box<str>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::error::Error for LexicalErrorType {}
|
|
||||||
|
|
||||||
impl std::fmt::Display for LexicalErrorType {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
LexicalErrorType::StringError => write!(f, "Got unexpected string"),
|
|
||||||
LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
|
|
||||||
LexicalErrorType::InvalidByteLiteral => {
|
|
||||||
write!(f, "bytes can only contain ASCII literal characters")
|
|
||||||
}
|
|
||||||
LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
|
|
||||||
LexicalErrorType::IndentationError => {
|
|
||||||
write!(f, "unindent does not match any outer indentation level")
|
|
||||||
}
|
|
||||||
LexicalErrorType::UnrecognizedToken { tok } => {
|
|
||||||
write!(f, "Got unexpected token {tok}")
|
|
||||||
}
|
|
||||||
LexicalErrorType::LineContinuationError => {
|
|
||||||
write!(f, "unexpected character after line continuation character")
|
|
||||||
}
|
|
||||||
LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
|
|
||||||
LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
|
|
||||||
LexicalErrorType::UnclosedStringError => {
|
|
||||||
write!(f, "missing closing quote in string literal")
|
|
||||||
}
|
|
||||||
LexicalErrorType::MissingUnicodeLbrace => {
|
|
||||||
write!(f, "Missing `{{` in Unicode escape sequence")
|
|
||||||
}
|
|
||||||
LexicalErrorType::MissingUnicodeRbrace => {
|
|
||||||
write!(f, "Missing `}}` in Unicode escape sequence")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub(crate) enum TokenValue {
|
|
||||||
#[default]
|
|
||||||
None,
|
|
||||||
/// Token value for a name, commonly known as an identifier.
|
|
||||||
///
|
|
||||||
/// Unicode names are NFKC-normalized by the lexer,
|
|
||||||
/// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
|
|
||||||
Name(Box<str>),
|
|
||||||
/// Token value for an integer.
|
|
||||||
Int(Int),
|
|
||||||
/// Token value for a floating point number.
|
|
||||||
Float(f64),
|
|
||||||
/// Token value for a complex number.
|
|
||||||
Complex {
|
|
||||||
/// The real part of the complex number.
|
|
||||||
real: f64,
|
|
||||||
/// The imaginary part of the complex number.
|
|
||||||
imag: f64,
|
|
||||||
},
|
|
||||||
/// Token value for a string.
|
|
||||||
String(Box<str>),
|
|
||||||
/// Token value that includes the portion of text inside the f-string that's not
|
|
||||||
/// part of the expression part and isn't an opening or closing brace.
|
|
||||||
FStringMiddle(Box<str>),
|
|
||||||
/// Token value for IPython escape commands. These are recognized by the lexer
|
|
||||||
/// only when the mode is [`Mode::Ipython`].
|
|
||||||
IpyEscapeCommand {
|
|
||||||
/// The magic command value.
|
|
||||||
value: Box<str>,
|
|
||||||
/// The kind of magic command.
|
|
||||||
kind: IpyEscapeKind,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) struct LexerCheckpoint {
|
pub(crate) struct LexerCheckpoint {
|
||||||
value: TokenValue,
|
value: TokenValue,
|
||||||
current_kind: TokenKind,
|
current_kind: TokenKind,
|
||||||
|
|
|
@ -67,8 +67,7 @@
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
pub use crate::error::{FStringErrorType, ParseError, ParseErrorType};
|
pub use crate::error::{FStringErrorType, ParseError, ParseErrorType};
|
||||||
pub use crate::lexer::Token;
|
pub use crate::token::{Token, TokenKind};
|
||||||
pub use crate::token::TokenKind;
|
|
||||||
|
|
||||||
use crate::parser::Parser;
|
use crate::parser::Parser;
|
||||||
|
|
||||||
|
@ -592,7 +591,7 @@ impl std::fmt::Display for ModeParseError {
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
use crate::lexer::TokenFlags;
|
use crate::token::TokenFlags;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -11,12 +11,12 @@ use ruff_python_ast::{
|
||||||
};
|
};
|
||||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::lexer::TokenValue;
|
|
||||||
use crate::parser::progress::ParserProgress;
|
use crate::parser::progress::ParserProgress;
|
||||||
use crate::parser::{helpers, FunctionKind, Parser};
|
use crate::parser::{helpers, FunctionKind, Parser};
|
||||||
use crate::string::{parse_fstring_literal_element, parse_string_literal, StringType};
|
use crate::string::{parse_fstring_literal_element, parse_string_literal, StringType};
|
||||||
|
use crate::token::{TokenKind, TokenValue};
|
||||||
use crate::token_set::TokenSet;
|
use crate::token_set::TokenSet;
|
||||||
use crate::{FStringErrorType, Mode, ParseErrorType, TokenKind};
|
use crate::{FStringErrorType, Mode, ParseErrorType};
|
||||||
|
|
||||||
use super::{FStringElementsKind, Parenthesized, RecoveryContextKind};
|
use super::{FStringElementsKind, Parenthesized, RecoveryContextKind};
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,9 @@ use bitflags::bitflags;
|
||||||
use ruff_python_ast::{Mod, ModExpression, ModModule};
|
use ruff_python_ast::{Mod, ModExpression, ModModule};
|
||||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::lexer::TokenValue;
|
|
||||||
use crate::parser::expression::ExpressionContext;
|
use crate::parser::expression::ExpressionContext;
|
||||||
use crate::parser::progress::{ParserProgress, TokenId};
|
use crate::parser::progress::{ParserProgress, TokenId};
|
||||||
|
use crate::token::TokenValue;
|
||||||
use crate::token_set::TokenSet;
|
use crate::token_set::TokenSet;
|
||||||
use crate::token_source::{TokenSource, TokenSourceCheckpoint};
|
use crate::token_source::{TokenSource, TokenSourceCheckpoint};
|
||||||
use crate::{Mode, ParseError, ParseErrorType, TokenKind};
|
use crate::{Mode, ParseError, ParseErrorType, TokenKind};
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton};
|
use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton};
|
||||||
use ruff_text_size::{Ranged, TextSize};
|
use ruff_text_size::{Ranged, TextSize};
|
||||||
|
|
||||||
use crate::lexer::TokenValue;
|
|
||||||
use crate::parser::progress::ParserProgress;
|
use crate::parser::progress::ParserProgress;
|
||||||
use crate::parser::{recovery, Parser, RecoveryContextKind, SequenceMatchPatternParentheses};
|
use crate::parser::{recovery, Parser, RecoveryContextKind, SequenceMatchPatternParentheses};
|
||||||
|
use crate::token::{TokenKind, TokenValue};
|
||||||
use crate::token_set::TokenSet;
|
use crate::token_set::TokenSet;
|
||||||
use crate::{ParseErrorType, TokenKind};
|
use crate::ParseErrorType;
|
||||||
|
|
||||||
use super::expression::ExpressionContext;
|
use super::expression::ExpressionContext;
|
||||||
|
|
||||||
|
|
|
@ -8,14 +8,14 @@ use ruff_python_ast::{
|
||||||
};
|
};
|
||||||
use ruff_text_size::{Ranged, TextSize};
|
use ruff_text_size::{Ranged, TextSize};
|
||||||
|
|
||||||
use crate::lexer::TokenValue;
|
|
||||||
use crate::parser::expression::{ParsedExpr, EXPR_SET};
|
use crate::parser::expression::{ParsedExpr, EXPR_SET};
|
||||||
use crate::parser::progress::ParserProgress;
|
use crate::parser::progress::ParserProgress;
|
||||||
use crate::parser::{
|
use crate::parser::{
|
||||||
helpers, FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind,
|
helpers, FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind,
|
||||||
};
|
};
|
||||||
|
use crate::token::{TokenKind, TokenValue};
|
||||||
use crate::token_set::TokenSet;
|
use crate::token_set::TokenSet;
|
||||||
use crate::{Mode, ParseErrorType, TokenKind};
|
use crate::{Mode, ParseErrorType};
|
||||||
|
|
||||||
use super::expression::ExpressionContext;
|
use super::expression::ExpressionContext;
|
||||||
use super::Parenthesized;
|
use super::Parenthesized;
|
||||||
|
|
|
@ -5,7 +5,7 @@ use bstr::ByteSlice;
|
||||||
use ruff_python_ast::{self as ast, AnyStringFlags, Expr, StringFlags};
|
use ruff_python_ast::{self as ast, AnyStringFlags, Expr, StringFlags};
|
||||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
use crate::error::{LexicalError, LexicalErrorType};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) enum StringType {
|
pub(crate) enum StringType {
|
||||||
|
@ -471,7 +471,7 @@ pub(crate) fn parse_fstring_literal_element(
|
||||||
mod tests {
|
mod tests {
|
||||||
use ruff_python_ast::Suite;
|
use ruff_python_ast::Suite;
|
||||||
|
|
||||||
use crate::lexer::LexicalErrorType;
|
use crate::error::LexicalErrorType;
|
||||||
use crate::{parse_module, FStringErrorType, ParseError, ParseErrorType, Parsed};
|
use crate::{parse_module, FStringErrorType, ParseError, ParseErrorType, Parsed};
|
||||||
|
|
||||||
const WINDOWS_EOL: &str = "\r\n";
|
const WINDOWS_EOL: &str = "\r\n";
|
||||||
|
|
|
@ -7,7 +7,85 @@
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use ruff_python_ast::{BoolOp, Operator, UnaryOp};
|
use bitflags::bitflags;
|
||||||
|
|
||||||
|
use ruff_python_ast::str::Quote;
|
||||||
|
use ruff_python_ast::str_prefix::{
|
||||||
|
AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix,
|
||||||
|
};
|
||||||
|
use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp};
|
||||||
|
use ruff_text_size::{Ranged, TextRange};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct Token {
|
||||||
|
/// The kind of the token.
|
||||||
|
kind: TokenKind,
|
||||||
|
/// The range of the token.
|
||||||
|
range: TextRange,
|
||||||
|
/// The set of flags describing this token.
|
||||||
|
flags: TokenFlags,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token {
|
||||||
|
Self { kind, range, flags }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the token kind.
|
||||||
|
#[inline]
|
||||||
|
pub const fn kind(&self) -> TokenKind {
|
||||||
|
self.kind
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the token as a tuple of (kind, range).
|
||||||
|
#[inline]
|
||||||
|
pub const fn as_tuple(&self) -> (TokenKind, TextRange) {
|
||||||
|
(self.kind, self.range)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this is a trivia token.
|
||||||
|
#[inline]
|
||||||
|
pub const fn is_trivia(self) -> bool {
|
||||||
|
matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if the current token is a triple-quoted string of any kind.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// If it isn't a string or any f-string tokens.
|
||||||
|
pub fn is_triple_quoted_string(self) -> bool {
|
||||||
|
assert!(self.is_any_string());
|
||||||
|
self.flags.is_triple_quoted()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the [`Quote`] style for the current string token of any kind.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// If it isn't a string or any f-string tokens.
|
||||||
|
pub fn string_quote_style(self) -> Quote {
|
||||||
|
assert!(self.is_any_string());
|
||||||
|
self.flags.quote_style()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this is any kind of string token.
|
||||||
|
const fn is_any_string(self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self.kind,
|
||||||
|
TokenKind::String
|
||||||
|
| TokenKind::FStringStart
|
||||||
|
| TokenKind::FStringMiddle
|
||||||
|
| TokenKind::FStringEnd
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ranged for Token {
|
||||||
|
fn range(&self) -> TextRange {
|
||||||
|
self.range
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A kind of a token.
|
/// A kind of a token.
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
|
||||||
|
@ -591,11 +669,126 @@ impl fmt::Display for TokenKind {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_pointer_width = "64")]
|
bitflags! {
|
||||||
mod sizes {
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
pub(crate) struct TokenFlags: u8 {
|
||||||
use static_assertions::assert_eq_size;
|
/// The token is a string with double quotes (`"`).
|
||||||
|
const DOUBLE_QUOTES = 1 << 0;
|
||||||
|
/// The token is a triple-quoted string i.e., it starts and ends with three consecutive
|
||||||
|
/// quote characters (`"""` or `'''`).
|
||||||
|
const TRIPLE_QUOTED_STRING = 1 << 1;
|
||||||
|
|
||||||
assert_eq_size!(LexicalErrorType, [u8; 24]);
|
/// The token is a unicode string i.e., prefixed with `u` or `U`
|
||||||
assert_eq_size!(LexicalError, [u8; 32]);
|
const UNICODE_STRING = 1 << 2;
|
||||||
|
/// The token is a byte string i.e., prefixed with `b` or `B`
|
||||||
|
const BYTE_STRING = 1 << 3;
|
||||||
|
/// The token is an f-string i.e., prefixed with `f` or `F`
|
||||||
|
const F_STRING = 1 << 4;
|
||||||
|
/// The token is a raw string and the prefix character is in lowercase.
|
||||||
|
const RAW_STRING_LOWERCASE = 1 << 5;
|
||||||
|
/// The token is a raw string and the prefix character is in uppercase.
|
||||||
|
const RAW_STRING_UPPERCASE = 1 << 6;
|
||||||
|
|
||||||
|
/// The token is a raw string i.e., prefixed with `r` or `R`
|
||||||
|
const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StringFlags for TokenFlags {
|
||||||
|
fn quote_style(self) -> Quote {
|
||||||
|
if self.intersects(TokenFlags::DOUBLE_QUOTES) {
|
||||||
|
Quote::Double
|
||||||
|
} else {
|
||||||
|
Quote::Single
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_triple_quoted(self) -> bool {
|
||||||
|
self.intersects(TokenFlags::TRIPLE_QUOTED_STRING)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefix(self) -> AnyStringPrefix {
|
||||||
|
if self.intersects(TokenFlags::F_STRING) {
|
||||||
|
if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
||||||
|
AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false })
|
||||||
|
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
||||||
|
AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true })
|
||||||
|
} else {
|
||||||
|
AnyStringPrefix::Format(FStringPrefix::Regular)
|
||||||
|
}
|
||||||
|
} else if self.intersects(TokenFlags::BYTE_STRING) {
|
||||||
|
if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
||||||
|
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
|
||||||
|
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
||||||
|
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
|
||||||
|
} else {
|
||||||
|
AnyStringPrefix::Bytes(ByteStringPrefix::Regular)
|
||||||
|
}
|
||||||
|
} else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
||||||
|
AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false })
|
||||||
|
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
||||||
|
AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true })
|
||||||
|
} else if self.intersects(TokenFlags::UNICODE_STRING) {
|
||||||
|
AnyStringPrefix::Regular(StringLiteralPrefix::Unicode)
|
||||||
|
} else {
|
||||||
|
AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenFlags {
|
||||||
|
/// Returns `true` if the token is an f-string.
|
||||||
|
pub(crate) const fn is_f_string(self) -> bool {
|
||||||
|
self.intersects(TokenFlags::F_STRING)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if the token is a triple-quoted f-string.
|
||||||
|
pub(crate) fn is_triple_quoted_fstring(self) -> bool {
|
||||||
|
self.contains(TokenFlags::F_STRING | TokenFlags::TRIPLE_QUOTED_STRING)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if the token is a raw string.
|
||||||
|
pub(crate) const fn is_raw_string(self) -> bool {
|
||||||
|
self.intersects(TokenFlags::RAW_STRING)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts this type to [`AnyStringFlags`], setting the equivalent flags.
|
||||||
|
pub(crate) fn as_any_string_flags(self) -> AnyStringFlags {
|
||||||
|
AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub(crate) enum TokenValue {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
/// Token value for a name, commonly known as an identifier.
|
||||||
|
///
|
||||||
|
/// Unicode names are NFKC-normalized by the lexer,
|
||||||
|
/// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
|
||||||
|
Name(Box<str>),
|
||||||
|
/// Token value for an integer.
|
||||||
|
Int(Int),
|
||||||
|
/// Token value for a floating point number.
|
||||||
|
Float(f64),
|
||||||
|
/// Token value for a complex number.
|
||||||
|
Complex {
|
||||||
|
/// The real part of the complex number.
|
||||||
|
real: f64,
|
||||||
|
/// The imaginary part of the complex number.
|
||||||
|
imag: f64,
|
||||||
|
},
|
||||||
|
/// Token value for a string.
|
||||||
|
String(Box<str>),
|
||||||
|
/// Token value that includes the portion of text inside the f-string that's not
|
||||||
|
/// part of the expression part and isn't an opening or closing brace.
|
||||||
|
FStringMiddle(Box<str>),
|
||||||
|
/// Token value for IPython escape commands. These are recognized by the lexer
|
||||||
|
/// only when the mode is [`Mode::Ipython`].
|
||||||
|
IpyEscapeCommand {
|
||||||
|
/// The magic command value.
|
||||||
|
value: Box<str>,
|
||||||
|
/// The kind of magic command.
|
||||||
|
kind: IpyEscapeKind,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue};
|
use crate::error::LexicalError;
|
||||||
use crate::{Mode, TokenKind};
|
use crate::lexer::{Lexer, LexerCheckpoint};
|
||||||
|
use crate::token::{Token, TokenFlags, TokenKind, TokenValue};
|
||||||
|
use crate::Mode;
|
||||||
|
|
||||||
/// Token source for the parser that skips over any trivia tokens.
|
/// Token source for the parser that skips over any trivia tokens.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue