Move token and error structs into related modules (#11957)

## Summary This PR does some housekeeping into moving certain structs into related modules. Specifically, 1. Move `LexicalError` from `lexer.rs` to `error.rs` which also contains the `ParseError` 2. Move `Token`, `TokenFlags` and `TokenValue` from `lexer.rs` to `token.rs`
2025-09-26 11:59:35 +00:00 · 2024-06-21 15:37:19 +05:30 · 2024-06-21 15:37:19 +05:30 · 96da136e6a
commit 96da136e6a
parent 4667d8697c
10 changed files with 352 additions and 342 deletions
--- a/crates/ruff_python_parser/src/error.rs
+++ b/crates/ruff_python_parser/src/error.rs
@ -2,7 +2,6 @@ use std::fmt;

 use ruff_text_size::TextRange;

-use crate::lexer::{LexicalError, LexicalErrorType};
 use crate::TokenKind;

 /// Represents represent errors that occur during parsing and are
@ -295,3 +294,135 @@ impl std::fmt::Display for ParseErrorType {
        }
    }
 }
+
+/// Represents an error that occur during lexing and are
+/// returned by the `parse_*` functions in the iterator in the
+/// [lexer] implementation.
+///
+/// [lexer]: crate::lexer
+#[derive(Debug, Clone, PartialEq)]
+pub struct LexicalError {
+    /// The type of error that occurred.
+    error: LexicalErrorType,
+    /// The location of the error.
+    location: TextRange,
+}
+
+impl LexicalError {
+    /// Creates a new `LexicalError` with the given error type and location.
+    pub fn new(error: LexicalErrorType, location: TextRange) -> Self {
+        Self { error, location }
+    }
+
+    pub fn error(&self) -> &LexicalErrorType {
+        &self.error
+    }
+
+    pub fn into_error(self) -> LexicalErrorType {
+        self.error
+    }
+
+    pub fn location(&self) -> TextRange {
+        self.location
+    }
+}
+
+impl std::ops::Deref for LexicalError {
+    type Target = LexicalErrorType;
+
+    fn deref(&self) -> &Self::Target {
+        self.error()
+    }
+}
+
+impl std::error::Error for LexicalError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        Some(self.error())
+    }
+}
+
+impl std::fmt::Display for LexicalError {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "{} at byte offset {}",
+            self.error(),
+            u32::from(self.location().start())
+        )
+    }
+}
+
+/// Represents the different types of errors that can occur during lexing.
+#[derive(Debug, Clone, PartialEq)]
+pub enum LexicalErrorType {
+    // TODO: Can probably be removed, the places it is used seem to be able
+    // to use the `UnicodeError` variant instead.
+    #[doc(hidden)]
+    StringError,
+    /// A string literal without the closing quote.
+    UnclosedStringError,
+    /// Decoding of a unicode escape sequence in a string literal failed.
+    UnicodeError,
+    /// Missing the `{` for unicode escape sequence.
+    MissingUnicodeLbrace,
+    /// Missing the `}` for unicode escape sequence.
+    MissingUnicodeRbrace,
+    /// The indentation is not consistent.
+    IndentationError,
+    /// An unrecognized token was encountered.
+    UnrecognizedToken { tok: char },
+    /// An f-string error containing the [`FStringErrorType`].
+    FStringError(FStringErrorType),
+    /// Invalid character encountered in a byte literal.
+    InvalidByteLiteral,
+    /// An unexpected character was encountered after a line continuation.
+    LineContinuationError,
+    /// An unexpected end of file was encountered.
+    Eof,
+    /// An unexpected error occurred.
+    OtherError(Box<str>),
+}
+
+impl std::error::Error for LexicalErrorType {}
+
+impl std::fmt::Display for LexicalErrorType {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match self {
+            LexicalErrorType::StringError => write!(f, "Got unexpected string"),
+            LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
+            LexicalErrorType::InvalidByteLiteral => {
+                write!(f, "bytes can only contain ASCII literal characters")
+            }
+            LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
+            LexicalErrorType::IndentationError => {
+                write!(f, "unindent does not match any outer indentation level")
+            }
+            LexicalErrorType::UnrecognizedToken { tok } => {
+                write!(f, "Got unexpected token {tok}")
+            }
+            LexicalErrorType::LineContinuationError => {
+                write!(f, "unexpected character after line continuation character")
+            }
+            LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
+            LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
+            LexicalErrorType::UnclosedStringError => {
+                write!(f, "missing closing quote in string literal")
+            }
+            LexicalErrorType::MissingUnicodeLbrace => {
+                write!(f, "Missing `{{` in Unicode escape sequence")
+            }
+            LexicalErrorType::MissingUnicodeRbrace => {
+                write!(f, "Missing `}}` in Unicode escape sequence")
+            }
+        }
+    }
+}
+
+#[cfg(target_pointer_width = "64")]
+mod sizes {
+    use crate::error::{LexicalError, LexicalErrorType};
+    use static_assertions::assert_eq_size;
+
+    assert_eq_size!(LexicalErrorType, [u8; 24]);
+    assert_eq_size!(LexicalError, [u8; 32]);
+}
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@ -9,23 +9,19 @@
 use std::cmp::Ordering;
 use std::str::FromStr;

-use bitflags::bitflags;
 use unicode_ident::{is_xid_continue, is_xid_start};
 use unicode_normalization::UnicodeNormalization;

-use ruff_python_ast::str::Quote;
-use ruff_python_ast::str_prefix::{
-    AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix,
-};
-use ruff_python_ast::{AnyStringFlags, Int, IpyEscapeKind, StringFlags};
+use ruff_python_ast::{Int, IpyEscapeKind, StringFlags};
 use ruff_python_trivia::is_python_whitespace;
-use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
+use ruff_text_size::{TextLen, TextRange, TextSize};

-use crate::error::FStringErrorType;
+use crate::error::{FStringErrorType, LexicalError, LexicalErrorType};
 use crate::lexer::cursor::{Cursor, EOF_CHAR};
 use crate::lexer::fstring::{FStringContext, FStrings, FStringsCheckpoint};
 use crate::lexer::indentation::{Indentation, Indentations, IndentationsCheckpoint};
-use crate::{Mode, TokenKind};
+use crate::token::{TokenFlags, TokenKind, TokenValue};
+use crate::Mode;

 mod cursor;
 mod fstring;
@ -1511,317 +1507,6 @@ impl<'src> Lexer<'src> {
    }
 }

-bitflags! {
-    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-    pub(crate) struct TokenFlags: u8 {
-        /// The token is a string with double quotes (`"`).
-        const DOUBLE_QUOTES = 1 << 0;
-        /// The token is a triple-quoted string i.e., it starts and ends with three consecutive
-        /// quote characters (`"""` or `'''`).
-        const TRIPLE_QUOTED_STRING = 1 << 1;
-
-        /// The token is a unicode string i.e., prefixed with `u` or `U`
-        const UNICODE_STRING = 1 << 2;
-        /// The token is a byte string i.e., prefixed with `b` or `B`
-        const BYTE_STRING = 1 << 3;
-        /// The token is an f-string i.e., prefixed with `f` or `F`
-        const F_STRING = 1 << 4;
-        /// The token is a raw string and the prefix character is in lowercase.
-        const RAW_STRING_LOWERCASE = 1 << 5;
-        /// The token is a raw string and the prefix character is in uppercase.
-        const RAW_STRING_UPPERCASE = 1 << 6;
-
-        /// The token is a raw string i.e., prefixed with `r` or `R`
-        const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
-    }
-}
-
-impl StringFlags for TokenFlags {
-    fn quote_style(self) -> Quote {
-        if self.intersects(TokenFlags::DOUBLE_QUOTES) {
-            Quote::Double
-        } else {
-            Quote::Single
-        }
-    }
-
-    fn is_triple_quoted(self) -> bool {
-        self.intersects(TokenFlags::TRIPLE_QUOTED_STRING)
-    }
-
-    fn prefix(self) -> AnyStringPrefix {
-        if self.intersects(TokenFlags::F_STRING) {
-            if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
-                AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false })
-            } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
-                AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true })
-            } else {
-                AnyStringPrefix::Format(FStringPrefix::Regular)
-            }
-        } else if self.intersects(TokenFlags::BYTE_STRING) {
-            if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
-                AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
-            } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
-                AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
-            } else {
-                AnyStringPrefix::Bytes(ByteStringPrefix::Regular)
-            }
-        } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
-            AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false })
-        } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
-            AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true })
-        } else if self.intersects(TokenFlags::UNICODE_STRING) {
-            AnyStringPrefix::Regular(StringLiteralPrefix::Unicode)
-        } else {
-            AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
-        }
-    }
-}
-
-impl TokenFlags {
-    /// Returns `true` if the token is an f-string.
-    const fn is_f_string(self) -> bool {
-        self.intersects(TokenFlags::F_STRING)
-    }
-
-    /// Returns `true` if the token is a triple-quoted f-string.
-    fn is_triple_quoted_fstring(self) -> bool {
-        self.contains(TokenFlags::F_STRING | TokenFlags::TRIPLE_QUOTED_STRING)
-    }
-
-    /// Returns `true` if the token is a raw string.
-    const fn is_raw_string(self) -> bool {
-        self.intersects(TokenFlags::RAW_STRING)
-    }
-
-    pub(crate) fn as_any_string_flags(self) -> AnyStringFlags {
-        AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted())
-    }
-}
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub struct Token {
-    /// The kind of the token.
-    kind: TokenKind,
-    /// The range of the token.
-    range: TextRange,
-    /// The set of flags describing this token.
-    flags: TokenFlags,
-}
-
-impl Token {
-    pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token {
-        Self { kind, range, flags }
-    }
-
-    /// Returns the token kind.
-    #[inline]
-    pub const fn kind(&self) -> TokenKind {
-        self.kind
-    }
-
-    /// Returns the token as a tuple of (kind, range).
-    #[inline]
-    pub const fn as_tuple(&self) -> (TokenKind, TextRange) {
-        (self.kind, self.range)
-    }
-
-    /// Returns `true` if this is any kind of string token.
-    const fn is_any_string(self) -> bool {
-        matches!(
-            self.kind,
-            TokenKind::String
-                | TokenKind::FStringStart
-                | TokenKind::FStringMiddle
-                | TokenKind::FStringEnd
-        )
-    }
-
-    /// Returns `true` if the current token is a triple-quoted string of any kind.
-    ///
-    /// # Panics
-    ///
-    /// If it isn't a string or any f-string tokens.
-    pub fn is_triple_quoted_string(self) -> bool {
-        assert!(self.is_any_string());
-        self.flags.is_triple_quoted()
-    }
-
-    /// Returns the [`Quote`] style for the current string token of any kind.
-    ///
-    /// # Panics
-    ///
-    /// If it isn't a string or any f-string tokens.
-    pub fn string_quote_style(self) -> Quote {
-        assert!(self.is_any_string());
-        self.flags.quote_style()
-    }
-}
-
-impl Ranged for Token {
-    fn range(&self) -> TextRange {
-        self.range
-    }
-}
-
-/// Represents an error that occur during lexing and are
-/// returned by the `parse_*` functions in the iterator in the
-/// [lexer] implementation.
-///
-/// [lexer]: crate::lexer
-#[derive(Debug, Clone, PartialEq)]
-pub struct LexicalError {
-    /// The type of error that occurred.
-    error: LexicalErrorType,
-    /// The location of the error.
-    location: TextRange,
-}
-
-impl LexicalError {
-    /// Creates a new `LexicalError` with the given error type and location.
-    pub fn new(error: LexicalErrorType, location: TextRange) -> Self {
-        Self { error, location }
-    }
-
-    pub fn error(&self) -> &LexicalErrorType {
-        &self.error
-    }
-
-    pub fn into_error(self) -> LexicalErrorType {
-        self.error
-    }
-
-    pub fn location(&self) -> TextRange {
-        self.location
-    }
-}
-
-impl std::ops::Deref for LexicalError {
-    type Target = LexicalErrorType;
-
-    fn deref(&self) -> &Self::Target {
-        self.error()
-    }
-}
-
-impl std::error::Error for LexicalError {
-    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
-        Some(self.error())
-    }
-}
-
-impl std::fmt::Display for LexicalError {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(
-            f,
-            "{} at byte offset {}",
-            self.error(),
-            u32::from(self.location().start())
-        )
-    }
-}
-
-/// Represents the different types of errors that can occur during lexing.
-#[derive(Debug, Clone, PartialEq)]
-pub enum LexicalErrorType {
-    // TODO: Can probably be removed, the places it is used seem to be able
-    // to use the `UnicodeError` variant instead.
-    #[doc(hidden)]
-    StringError,
-    /// A string literal without the closing quote.
-    UnclosedStringError,
-    /// Decoding of a unicode escape sequence in a string literal failed.
-    UnicodeError,
-    /// Missing the `{` for unicode escape sequence.
-    MissingUnicodeLbrace,
-    /// Missing the `}` for unicode escape sequence.
-    MissingUnicodeRbrace,
-    /// The indentation is not consistent.
-    IndentationError,
-    /// An unrecognized token was encountered.
-    UnrecognizedToken { tok: char },
-    /// An f-string error containing the [`FStringErrorType`].
-    FStringError(FStringErrorType),
-    /// Invalid character encountered in a byte literal.
-    InvalidByteLiteral,
-    /// An unexpected character was encountered after a line continuation.
-    LineContinuationError,
-    /// An unexpected end of file was encountered.
-    Eof,
-    /// An unexpected error occurred.
-    OtherError(Box<str>),
-}
-
-impl std::error::Error for LexicalErrorType {}
-
-impl std::fmt::Display for LexicalErrorType {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match self {
-            LexicalErrorType::StringError => write!(f, "Got unexpected string"),
-            LexicalErrorType::FStringError(error) => write!(f, "f-string: {error}"),
-            LexicalErrorType::InvalidByteLiteral => {
-                write!(f, "bytes can only contain ASCII literal characters")
-            }
-            LexicalErrorType::UnicodeError => write!(f, "Got unexpected unicode"),
-            LexicalErrorType::IndentationError => {
-                write!(f, "unindent does not match any outer indentation level")
-            }
-            LexicalErrorType::UnrecognizedToken { tok } => {
-                write!(f, "Got unexpected token {tok}")
-            }
-            LexicalErrorType::LineContinuationError => {
-                write!(f, "unexpected character after line continuation character")
-            }
-            LexicalErrorType::Eof => write!(f, "unexpected EOF while parsing"),
-            LexicalErrorType::OtherError(msg) => write!(f, "{msg}"),
-            LexicalErrorType::UnclosedStringError => {
-                write!(f, "missing closing quote in string literal")
-            }
-            LexicalErrorType::MissingUnicodeLbrace => {
-                write!(f, "Missing `{{` in Unicode escape sequence")
-            }
-            LexicalErrorType::MissingUnicodeRbrace => {
-                write!(f, "Missing `}}` in Unicode escape sequence")
-            }
-        }
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-pub(crate) enum TokenValue {
-    #[default]
-    None,
-    /// Token value for a name, commonly known as an identifier.
-    ///
-    /// Unicode names are NFKC-normalized by the lexer,
-    /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
-    Name(Box<str>),
-    /// Token value for an integer.
-    Int(Int),
-    /// Token value for a floating point number.
-    Float(f64),
-    /// Token value for a complex number.
-    Complex {
-        /// The real part of the complex number.
-        real: f64,
-        /// The imaginary part of the complex number.
-        imag: f64,
-    },
-    /// Token value for a string.
-    String(Box<str>),
-    /// Token value that includes the portion of text inside the f-string that's not
-    /// part of the expression part and isn't an opening or closing brace.
-    FStringMiddle(Box<str>),
-    /// Token value for IPython escape commands. These are recognized by the lexer
-    /// only when the mode is [`Mode::Ipython`].
-    IpyEscapeCommand {
-        /// The magic command value.
-        value: Box<str>,
-        /// The kind of magic command.
-        kind: IpyEscapeKind,
-    },
-}
-
 pub(crate) struct LexerCheckpoint {
    value: TokenValue,
    current_kind: TokenKind,
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -67,8 +67,7 @@
 use std::ops::Deref;

 pub use crate::error::{FStringErrorType, ParseError, ParseErrorType};
-pub use crate::lexer::Token;
-pub use crate::token::TokenKind;
+pub use crate::token::{Token, TokenKind};

 use crate::parser::Parser;

@ -592,7 +591,7 @@ impl std::fmt::Display for ModeParseError {
 mod tests {
    use std::ops::Range;

-    use crate::lexer::TokenFlags;
+    use crate::token::TokenFlags;

    use super::*;

--- a/crates/ruff_python_parser/src/parser/expression.rs
+++ b/crates/ruff_python_parser/src/parser/expression.rs
@ -11,12 +11,12 @@ use ruff_python_ast::{
 };
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};

-use crate::lexer::TokenValue;
 use crate::parser::progress::ParserProgress;
 use crate::parser::{helpers, FunctionKind, Parser};
 use crate::string::{parse_fstring_literal_element, parse_string_literal, StringType};
+use crate::token::{TokenKind, TokenValue};
 use crate::token_set::TokenSet;
-use crate::{FStringErrorType, Mode, ParseErrorType, TokenKind};
+use crate::{FStringErrorType, Mode, ParseErrorType};

 use super::{FStringElementsKind, Parenthesized, RecoveryContextKind};

--- a/crates/ruff_python_parser/src/parser/mod.rs
+++ b/crates/ruff_python_parser/src/parser/mod.rs
@ -5,9 +5,9 @@ use bitflags::bitflags;
 use ruff_python_ast::{Mod, ModExpression, ModModule};
 use ruff_text_size::{Ranged, TextRange, TextSize};

-use crate::lexer::TokenValue;
 use crate::parser::expression::ExpressionContext;
 use crate::parser::progress::{ParserProgress, TokenId};
+use crate::token::TokenValue;
 use crate::token_set::TokenSet;
 use crate::token_source::{TokenSource, TokenSourceCheckpoint};
 use crate::{Mode, ParseError, ParseErrorType, TokenKind};
--- a/crates/ruff_python_parser/src/parser/pattern.rs
+++ b/crates/ruff_python_parser/src/parser/pattern.rs
@ -1,11 +1,11 @@
 use ruff_python_ast::{self as ast, Expr, ExprContext, Number, Operator, Pattern, Singleton};
 use ruff_text_size::{Ranged, TextSize};

-use crate::lexer::TokenValue;
 use crate::parser::progress::ParserProgress;
 use crate::parser::{recovery, Parser, RecoveryContextKind, SequenceMatchPatternParentheses};
+use crate::token::{TokenKind, TokenValue};
 use crate::token_set::TokenSet;
-use crate::{ParseErrorType, TokenKind};
+use crate::ParseErrorType;

 use super::expression::ExpressionContext;

--- a/crates/ruff_python_parser/src/parser/statement.rs
+++ b/crates/ruff_python_parser/src/parser/statement.rs
@ -8,14 +8,14 @@ use ruff_python_ast::{
 };
 use ruff_text_size::{Ranged, TextSize};

-use crate::lexer::TokenValue;
 use crate::parser::expression::{ParsedExpr, EXPR_SET};
 use crate::parser::progress::ParserProgress;
 use crate::parser::{
    helpers, FunctionKind, Parser, RecoveryContext, RecoveryContextKind, WithItemKind,
 };
+use crate::token::{TokenKind, TokenValue};
 use crate::token_set::TokenSet;
-use crate::{Mode, ParseErrorType, TokenKind};
+use crate::{Mode, ParseErrorType};

 use super::expression::ExpressionContext;
 use super::Parenthesized;
--- a/crates/ruff_python_parser/src/string.rs
+++ b/crates/ruff_python_parser/src/string.rs
@ -5,7 +5,7 @@ use bstr::ByteSlice;
 use ruff_python_ast::{self as ast, AnyStringFlags, Expr, StringFlags};
 use ruff_text_size::{Ranged, TextRange, TextSize};

-use crate::lexer::{LexicalError, LexicalErrorType};
+use crate::error::{LexicalError, LexicalErrorType};

 #[derive(Debug)]
 pub(crate) enum StringType {
@ -471,7 +471,7 @@ pub(crate) fn parse_fstring_literal_element(
 mod tests {
    use ruff_python_ast::Suite;

-    use crate::lexer::LexicalErrorType;
+    use crate::error::LexicalErrorType;
    use crate::{parse_module, FStringErrorType, ParseError, ParseErrorType, Parsed};

    const WINDOWS_EOL: &str = "\r\n";
--- a/crates/ruff_python_parser/src/token.rs
+++ b/crates/ruff_python_parser/src/token.rs
@ -7,7 +7,85 @@

 use std::fmt;

-use ruff_python_ast::{BoolOp, Operator, UnaryOp};
+use bitflags::bitflags;
+
+use ruff_python_ast::str::Quote;
+use ruff_python_ast::str_prefix::{
+    AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix,
+};
+use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp};
+use ruff_text_size::{Ranged, TextRange};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Token {
+    /// The kind of the token.
+    kind: TokenKind,
+    /// The range of the token.
+    range: TextRange,
+    /// The set of flags describing this token.
+    flags: TokenFlags,
+}
+
+impl Token {
+    pub(crate) fn new(kind: TokenKind, range: TextRange, flags: TokenFlags) -> Token {
+        Self { kind, range, flags }
+    }
+
+    /// Returns the token kind.
+    #[inline]
+    pub const fn kind(&self) -> TokenKind {
+        self.kind
+    }
+
+    /// Returns the token as a tuple of (kind, range).
+    #[inline]
+    pub const fn as_tuple(&self) -> (TokenKind, TextRange) {
+        (self.kind, self.range)
+    }
+
+    /// Returns `true` if this is a trivia token.
+    #[inline]
+    pub const fn is_trivia(self) -> bool {
+        matches!(self.kind, TokenKind::Comment | TokenKind::NonLogicalNewline)
+    }
+
+    /// Returns `true` if the current token is a triple-quoted string of any kind.
+    ///
+    /// # Panics
+    ///
+    /// If it isn't a string or any f-string tokens.
+    pub fn is_triple_quoted_string(self) -> bool {
+        assert!(self.is_any_string());
+        self.flags.is_triple_quoted()
+    }
+
+    /// Returns the [`Quote`] style for the current string token of any kind.
+    ///
+    /// # Panics
+    ///
+    /// If it isn't a string or any f-string tokens.
+    pub fn string_quote_style(self) -> Quote {
+        assert!(self.is_any_string());
+        self.flags.quote_style()
+    }
+
+    /// Returns `true` if this is any kind of string token.
+    const fn is_any_string(self) -> bool {
+        matches!(
+            self.kind,
+            TokenKind::String
+                | TokenKind::FStringStart
+                | TokenKind::FStringMiddle
+                | TokenKind::FStringEnd
+        )
+    }
+}
+
+impl Ranged for Token {
+    fn range(&self) -> TextRange {
+        self.range
+    }
+}

 /// A kind of a token.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
@ -591,11 +669,126 @@ impl fmt::Display for TokenKind {
    }
 }

-#[cfg(target_pointer_width = "64")]
-mod sizes {
-    use crate::lexer::{LexicalError, LexicalErrorType};
-    use static_assertions::assert_eq_size;
+bitflags! {
+    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+    pub(crate) struct TokenFlags: u8 {
+        /// The token is a string with double quotes (`"`).
+        const DOUBLE_QUOTES = 1 << 0;
+        /// The token is a triple-quoted string i.e., it starts and ends with three consecutive
+        /// quote characters (`"""` or `'''`).
+        const TRIPLE_QUOTED_STRING = 1 << 1;

-    assert_eq_size!(LexicalErrorType, [u8; 24]);
-    assert_eq_size!(LexicalError, [u8; 32]);
+        /// The token is a unicode string i.e., prefixed with `u` or `U`
+        const UNICODE_STRING = 1 << 2;
+        /// The token is a byte string i.e., prefixed with `b` or `B`
+        const BYTE_STRING = 1 << 3;
+        /// The token is an f-string i.e., prefixed with `f` or `F`
+        const F_STRING = 1 << 4;
+        /// The token is a raw string and the prefix character is in lowercase.
+        const RAW_STRING_LOWERCASE = 1 << 5;
+        /// The token is a raw string and the prefix character is in uppercase.
+        const RAW_STRING_UPPERCASE = 1 << 6;
+
+        /// The token is a raw string i.e., prefixed with `r` or `R`
+        const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
+    }
+}
+
+impl StringFlags for TokenFlags {
+    fn quote_style(self) -> Quote {
+        if self.intersects(TokenFlags::DOUBLE_QUOTES) {
+            Quote::Double
+        } else {
+            Quote::Single
+        }
+    }
+
+    fn is_triple_quoted(self) -> bool {
+        self.intersects(TokenFlags::TRIPLE_QUOTED_STRING)
+    }
+
+    fn prefix(self) -> AnyStringPrefix {
+        if self.intersects(TokenFlags::F_STRING) {
+            if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
+                AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: false })
+            } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
+                AnyStringPrefix::Format(FStringPrefix::Raw { uppercase_r: true })
+            } else {
+                AnyStringPrefix::Format(FStringPrefix::Regular)
+            }
+        } else if self.intersects(TokenFlags::BYTE_STRING) {
+            if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
+                AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
+            } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
+                AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
+            } else {
+                AnyStringPrefix::Bytes(ByteStringPrefix::Regular)
+            }
+        } else if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
+            AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false })
+        } else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
+            AnyStringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true })
+        } else if self.intersects(TokenFlags::UNICODE_STRING) {
+            AnyStringPrefix::Regular(StringLiteralPrefix::Unicode)
+        } else {
+            AnyStringPrefix::Regular(StringLiteralPrefix::Empty)
+        }
+    }
+}
+
+impl TokenFlags {
+    /// Returns `true` if the token is an f-string.
+    pub(crate) const fn is_f_string(self) -> bool {
+        self.intersects(TokenFlags::F_STRING)
+    }
+
+    /// Returns `true` if the token is a triple-quoted f-string.
+    pub(crate) fn is_triple_quoted_fstring(self) -> bool {
+        self.contains(TokenFlags::F_STRING | TokenFlags::TRIPLE_QUOTED_STRING)
+    }
+
+    /// Returns `true` if the token is a raw string.
+    pub(crate) const fn is_raw_string(self) -> bool {
+        self.intersects(TokenFlags::RAW_STRING)
+    }
+
+    /// Converts this type to [`AnyStringFlags`], setting the equivalent flags.
+    pub(crate) fn as_any_string_flags(self) -> AnyStringFlags {
+        AnyStringFlags::new(self.prefix(), self.quote_style(), self.is_triple_quoted())
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub(crate) enum TokenValue {
+    #[default]
+    None,
+    /// Token value for a name, commonly known as an identifier.
+    ///
+    /// Unicode names are NFKC-normalized by the lexer,
+    /// matching [the behaviour of Python's lexer](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)
+    Name(Box<str>),
+    /// Token value for an integer.
+    Int(Int),
+    /// Token value for a floating point number.
+    Float(f64),
+    /// Token value for a complex number.
+    Complex {
+        /// The real part of the complex number.
+        real: f64,
+        /// The imaginary part of the complex number.
+        imag: f64,
+    },
+    /// Token value for a string.
+    String(Box<str>),
+    /// Token value that includes the portion of text inside the f-string that's not
+    /// part of the expression part and isn't an opening or closing brace.
+    FStringMiddle(Box<str>),
+    /// Token value for IPython escape commands. These are recognized by the lexer
+    /// only when the mode is [`Mode::Ipython`].
+    IpyEscapeCommand {
+        /// The magic command value.
+        value: Box<str>,
+        /// The kind of magic command.
+        kind: IpyEscapeKind,
+    },
 }
--- a/crates/ruff_python_parser/src/token_source.rs
+++ b/crates/ruff_python_parser/src/token_source.rs
@ -1,7 +1,9 @@
 use ruff_text_size::{Ranged, TextRange, TextSize};

-use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue};
-use crate::{Mode, TokenKind};
+use crate::error::LexicalError;
+use crate::lexer::{Lexer, LexerCheckpoint};
+use crate::token::{Token, TokenFlags, TokenKind, TokenValue};
+use crate::Mode;

 /// Token source for the parser that skips over any trivia tokens.
 #[derive(Debug)]