mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-26 05:14:17 +00:00
Implement template strings (#17851)
This PR implements template strings (t-strings) in the parser and formatter for Ruff. Minimal changes necessary to compile were made in other parts of the code (e.g. ty, the linter, etc.). These will be covered properly in follow-up PRs.
This commit is contained in:
parent
ad024f9a09
commit
9bbf4987e8
261 changed files with 18023 additions and 1802 deletions
|
@ -12,7 +12,7 @@ use bitflags::bitflags;
|
|||
use ruff_python_ast::name::Name;
|
||||
use ruff_python_ast::str::{Quote, TripleQuotes};
|
||||
use ruff_python_ast::str_prefix::{
|
||||
AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix,
|
||||
AnyStringPrefix, ByteStringPrefix, FStringPrefix, StringLiteralPrefix, TStringPrefix,
|
||||
};
|
||||
use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
@ -48,7 +48,7 @@ impl Token {
|
|||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If it isn't a string or any f-string tokens.
|
||||
/// If it isn't a string or any f/t-string tokens.
|
||||
pub fn is_triple_quoted_string(self) -> bool {
|
||||
self.unwrap_string_flags().is_triple_quoted()
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ impl Token {
|
|||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If it isn't a string or any f-string tokens.
|
||||
/// If it isn't a string or any f/t-string tokens.
|
||||
pub fn string_quote_style(self) -> Quote {
|
||||
self.unwrap_string_flags().quote_style()
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ impl Token {
|
|||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If it isn't a string or any f-string tokens.
|
||||
/// If it isn't a string or any f/t-string tokens.
|
||||
pub fn unwrap_string_flags(self) -> AnyStringFlags {
|
||||
self.string_flags()
|
||||
.unwrap_or_else(|| panic!("token to be a string"))
|
||||
|
@ -81,7 +81,8 @@ impl Token {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this is any kind of string token.
|
||||
/// Returns `true` if this is any kind of string token - including
|
||||
/// tokens in t-strings (which do not have type `str`).
|
||||
const fn is_any_string(self) -> bool {
|
||||
matches!(
|
||||
self.kind,
|
||||
|
@ -89,6 +90,9 @@ impl Token {
|
|||
| TokenKind::FStringStart
|
||||
| TokenKind::FStringMiddle
|
||||
| TokenKind::FStringEnd
|
||||
| TokenKind::TStringStart
|
||||
| TokenKind::TStringMiddle
|
||||
| TokenKind::TStringEnd
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -140,6 +144,14 @@ pub enum TokenKind {
|
|||
FStringMiddle,
|
||||
/// Token kind for the end of an f-string. This includes the closing quote.
|
||||
FStringEnd,
|
||||
/// Token kind for the start of a t-string. This includes the `t`/`T`/`tr` prefix
|
||||
/// and the opening quote(s).
|
||||
TStringStart,
|
||||
/// Token kind that includes the portion of text inside the t-string that's not
|
||||
/// part of the interpolation part and isn't an opening or closing brace.
|
||||
TStringMiddle,
|
||||
/// Token kind for the end of a t-string. This includes the closing quote.
|
||||
TStringEnd,
|
||||
/// Token kind for a IPython escape command.
|
||||
IpyEscapeCommand,
|
||||
/// Token kind for a comment. These are filtered out of the token stream prior to parsing.
|
||||
|
@ -462,6 +474,11 @@ impl TokenKind {
|
|||
matches!(self, TokenKind::Plus | TokenKind::Minus)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_interpolated_string_end(self) -> bool {
|
||||
matches!(self, TokenKind::FStringEnd | TokenKind::TStringEnd)
|
||||
}
|
||||
|
||||
/// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary arithmetic
|
||||
/// operator, otherwise return [None].
|
||||
///
|
||||
|
@ -613,6 +630,9 @@ impl fmt::Display for TokenKind {
|
|||
TokenKind::FStringStart => "FStringStart",
|
||||
TokenKind::FStringMiddle => "FStringMiddle",
|
||||
TokenKind::FStringEnd => "FStringEnd",
|
||||
TokenKind::TStringStart => "TStringStart",
|
||||
TokenKind::TStringMiddle => "TStringMiddle",
|
||||
TokenKind::TStringEnd => "TStringEnd",
|
||||
TokenKind::IpyEscapeCommand => "IPython escape command",
|
||||
TokenKind::Comment => "comment",
|
||||
TokenKind::Question => "'?'",
|
||||
|
@ -722,10 +742,12 @@ bitflags! {
|
|||
const BYTE_STRING = 1 << 3;
|
||||
/// The token is an f-string i.e., prefixed with `f` or `F`
|
||||
const F_STRING = 1 << 4;
|
||||
/// The token is a t-string i.e., prefixed with `t` or `T`
|
||||
const T_STRING = 1 << 5;
|
||||
/// The token is a raw string and the prefix character is in lowercase.
|
||||
const RAW_STRING_LOWERCASE = 1 << 5;
|
||||
const RAW_STRING_LOWERCASE = 1 << 6;
|
||||
/// The token is a raw string and the prefix character is in uppercase.
|
||||
const RAW_STRING_UPPERCASE = 1 << 6;
|
||||
const RAW_STRING_UPPERCASE = 1 << 7;
|
||||
|
||||
/// The token is a raw string i.e., prefixed with `r` or `R`
|
||||
const RAW_STRING = Self::RAW_STRING_LOWERCASE.bits() | Self::RAW_STRING_UPPERCASE.bits();
|
||||
|
@ -758,6 +780,14 @@ impl StringFlags for TokenFlags {
|
|||
} else {
|
||||
AnyStringPrefix::Format(FStringPrefix::Regular)
|
||||
}
|
||||
} else if self.intersects(TokenFlags::T_STRING) {
|
||||
if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
||||
AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: false })
|
||||
} else if self.intersects(TokenFlags::RAW_STRING_UPPERCASE) {
|
||||
AnyStringPrefix::Template(TStringPrefix::Raw { uppercase_r: true })
|
||||
} else {
|
||||
AnyStringPrefix::Template(TStringPrefix::Regular)
|
||||
}
|
||||
} else if self.intersects(TokenFlags::BYTE_STRING) {
|
||||
if self.intersects(TokenFlags::RAW_STRING_LOWERCASE) {
|
||||
AnyStringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
|
||||
|
@ -784,9 +814,19 @@ impl TokenFlags {
|
|||
self.intersects(TokenFlags::F_STRING)
|
||||
}
|
||||
|
||||
/// Returns `true` if the token is a triple-quoted f-string.
|
||||
pub(crate) fn is_triple_quoted_fstring(self) -> bool {
|
||||
self.contains(TokenFlags::F_STRING | TokenFlags::TRIPLE_QUOTED_STRING)
|
||||
/// Returns `true` if the token is a t-string.
|
||||
pub(crate) const fn is_t_string(self) -> bool {
|
||||
self.intersects(TokenFlags::T_STRING)
|
||||
}
|
||||
|
||||
/// Returns `true` if the token is a t-string.
|
||||
pub(crate) const fn is_interpolated_string(self) -> bool {
|
||||
self.intersects(TokenFlags::T_STRING.union(TokenFlags::F_STRING))
|
||||
}
|
||||
|
||||
/// Returns `true` if the token is a triple-quoted t-string.
|
||||
pub(crate) fn is_triple_quoted_interpolated_string(self) -> bool {
|
||||
self.intersects(TokenFlags::TRIPLE_QUOTED_STRING) && self.is_interpolated_string()
|
||||
}
|
||||
|
||||
/// Returns `true` if the token is a raw string.
|
||||
|
@ -819,7 +859,7 @@ pub(crate) enum TokenValue {
|
|||
String(Box<str>),
|
||||
/// Token value that includes the portion of text inside the f-string that's not
|
||||
/// part of the expression part and isn't an opening or closing brace.
|
||||
FStringMiddle(Box<str>),
|
||||
InterpolatedStringMiddle(Box<str>),
|
||||
/// Token value for IPython escape commands. These are recognized by the lexer
|
||||
/// only when the mode is [`Mode::Ipython`].
|
||||
IpyEscapeCommand {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue