Simplify formatting of strings by using flags from the AST nodes (#10489)

This commit is contained in:
Alex Waygood 2024-03-20 16:16:54 +00:00 committed by GitHub
parent fc792d1d2e
commit 7caf0d064a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 725 additions and 809 deletions

View file

@ -34,19 +34,15 @@ use std::{char, cmp::Ordering, str::FromStr};
use unicode_ident::{is_xid_continue, is_xid_start};
use unicode_normalization::UnicodeNormalization;
use ruff_python_ast::{FStringPrefix, Int, IpyEscapeKind};
use ruff_python_ast::{
str::Quote, AnyStringKind, AnyStringPrefix, FStringPrefix, Int, IpyEscapeKind,
};
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::lexer::cursor::{Cursor, EOF_CHAR};
use crate::lexer::fstring::{FStringContext, FStrings};
use crate::lexer::indentation::{Indentation, Indentations};
use crate::{
soft_keywords::SoftKeywordTransformer,
string::FStringErrorType,
string_token_flags::{StringKind, StringPrefix},
token::Tok,
Mode,
};
use crate::{soft_keywords::SoftKeywordTransformer, string::FStringErrorType, token::Tok, Mode};
mod cursor;
mod fstring;
@ -188,14 +184,14 @@ impl<'source> Lexer<'source> {
return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true }));
}
(_, quote @ ('\'' | '"')) => {
if let Ok(prefix) = StringPrefix::try_from(first) {
if let Ok(prefix) = AnyStringPrefix::try_from(first) {
self.cursor.bump();
return self.lex_string(prefix, quote);
}
}
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
self.cursor.bump();
if let Ok(prefix) = StringPrefix::try_from([first, second]) {
if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) {
let quote = self.cursor.bump().unwrap();
return self.lex_string(prefix, quote);
}
@ -560,11 +556,14 @@ impl<'source> Lexer<'source> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut kind = StringKind::from_prefix(StringPrefix::Format(prefix));
let mut kind = AnyStringKind::default()
.with_prefix(AnyStringPrefix::Format(prefix))
.with_quote_style(if quote == '"' {
Quote::Double
} else {
Quote::Single
});
if quote == '"' {
kind = kind.with_double_quotes();
}
if self.cursor.eat_char2(quote, quote) {
kind = kind.with_triple_quotes();
}
@ -708,15 +707,17 @@ impl<'source> Lexer<'source> {
}
/// Lex a string literal.
fn lex_string(&mut self, prefix: StringPrefix, quote: char) -> Result<Tok, LexicalError> {
fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> Result<Tok, LexicalError> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut kind = StringKind::from_prefix(prefix);
if quote == '"' {
kind = kind.with_double_quotes();
}
let mut kind = AnyStringKind::default()
.with_prefix(prefix)
.with_quote_style(if quote == '"' {
Quote::Double
} else {
Quote::Single
});
// If the next two characters are also the quote character, then we have a triple-quoted
// string; consume those two characters and ensure that we require a triple-quote to close
@ -1082,7 +1083,7 @@ impl<'source> Lexer<'source> {
c if is_ascii_identifier_start(c) => self.lex_identifier(c)?,
'0'..='9' => self.lex_number(c)?,
'#' => return Ok((self.lex_comment(), self.token_range())),
'\'' | '"' => self.lex_string(StringPrefix::default(), c)?,
'\'' | '"' => self.lex_string(AnyStringPrefix::default(), c)?,
'=' => {
if self.cursor.eat_char('=') {
Tok::EqEqual

View file

@ -1,9 +1,9 @@
use crate::string_token_flags::StringKind;
use ruff_python_ast::AnyStringKind;
/// The context representing the current f-string that the lexer is in.
#[derive(Debug)]
pub(crate) struct FStringContext {
kind: StringKind,
kind: AnyStringKind,
/// The level of nesting for the lexer when it entered the current f-string.
/// The nesting level includes all kinds of parentheses i.e., round, square,
@ -17,7 +17,7 @@ pub(crate) struct FStringContext {
}
impl FStringContext {
pub(crate) const fn new(kind: StringKind, nesting: u32) -> Self {
pub(crate) const fn new(kind: AnyStringKind, nesting: u32) -> Self {
debug_assert!(kind.is_f_string());
Self {
kind,
@ -26,7 +26,7 @@ impl FStringContext {
}
}
pub(crate) const fn kind(&self) -> StringKind {
pub(crate) const fn kind(&self) -> AnyStringKind {
debug_assert!(self.kind.is_f_string());
self.kind
}

View file

@ -115,7 +115,6 @@ pub use parser::{
};
use ruff_python_ast::{Mod, PySourceType, Suite};
pub use string::FStringErrorType;
pub use string_token_flags::StringKind;
pub use token::{Tok, TokenKind};
use crate::lexer::LexResult;
@ -128,7 +127,6 @@ pub mod lexer;
mod parser;
mod soft_keywords;
mod string;
mod string_token_flags;
mod token;
mod token_source;
pub mod typing;

View file

@ -4,7 +4,7 @@
// See also: https://greentreesnakes.readthedocs.io/en/latest/nodes.html#keyword
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
use crate::{
FStringErrorType,
Mode,
@ -12,7 +12,6 @@ use crate::{
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
string_token_flags::StringKind,
token,
invalid,
};
@ -1983,7 +1982,7 @@ extern {
Dedent => token::Tok::Dedent,
StartModule => token::Tok::StartModule,
StartExpression => token::Tok::StartExpression,
fstring_start => token::Tok::FStringStart(<StringKind>),
fstring_start => token::Tok::FStringStart(<AnyStringKind>),
FStringEnd => token::Tok::FStringEnd,
"!" => token::Tok::Exclamation,
"?" => token::Tok::Question,
@ -2076,11 +2075,11 @@ extern {
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
string => token::Tok::String {
value: <Box<str>>,
kind: <StringKind>,
kind: <AnyStringKind>,
},
fstring_middle => token::Tok::FStringMiddle {
value: <Box<str>>,
kind: <StringKind>,
kind: <AnyStringKind>,
},
name => token::Tok::Name { name: <Box<str>> },
ipy_escape_command => token::Tok::IpyEscapeCommand {

View file

@ -1,7 +1,7 @@
// auto-generated: "lalrpop 0.20.0"
// sha3: c98876ae871e13c1a0cabf962138ded61584185a0c3144b626dac60f707ea396
// sha3: 4ca26eae1233cf922ef88887715de0a4ca45076324249a20b87f095e9638165d
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
use crate::{
FStringErrorType,
Mode,
@ -9,7 +9,6 @@ use crate::{
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
string_token_flags::StringKind,
token,
invalid,
};
@ -26,7 +25,7 @@ extern crate alloc;
mod __parse__Top {
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
use crate::{
FStringErrorType,
Mode,
@ -34,7 +33,6 @@ mod __parse__Top {
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
string_token_flags::StringKind,
token,
invalid,
};
@ -52,8 +50,8 @@ mod __parse__Top {
Variant0(token::Tok),
Variant1((f64, f64)),
Variant2(f64),
Variant3((Box<str>, StringKind)),
Variant4(StringKind),
Variant3((Box<str>, AnyStringKind)),
Variant4(AnyStringKind),
Variant5(Int),
Variant6((IpyEscapeKind, Box<str>)),
Variant7(Box<str>),
@ -151,7 +149,7 @@ mod __parse__Top {
Variant99(ast::TypeParams),
Variant100(core::option::Option<ast::TypeParams>),
Variant101(ast::UnaryOp),
Variant102(core::option::Option<(Box<str>, StringKind)>),
Variant102(core::option::Option<(Box<str>, AnyStringKind)>),
}
const __ACTION: &[i16] = &[
// State 0
@ -18322,7 +18320,7 @@ mod __parse__Top {
fn __pop_Variant3<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, (Box<str>, StringKind), TextSize)
) -> (TextSize, (Box<str>, AnyStringKind), TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant3(__v), __r)) => (__l, __v, __r),
@ -18479,6 +18477,16 @@ mod __parse__Top {
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant4<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, AnyStringKind, TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant7<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
@ -18509,16 +18517,6 @@ mod __parse__Top {
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant4<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, StringKind, TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant67<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
@ -19102,7 +19100,7 @@ mod __parse__Top {
fn __pop_Variant102<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize)
) -> (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant102(__v), __r)) => (__l, __v, __r),
@ -35724,7 +35722,7 @@ fn __action185<
(_, parameters, _): (TextSize, core::option::Option<ast::Parameters>, TextSize),
(_, end_location_args, _): (TextSize, TextSize, TextSize),
(_, _, _): (TextSize, token::Tok, TextSize),
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
(_, body, _): (TextSize, crate::parser::ParenthesizedExpr, TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
@ -36179,7 +36177,7 @@ fn __action218<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, string, _): (TextSize, (Box<str>, StringKind), TextSize),
(_, string, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -36196,7 +36194,7 @@ fn __action219<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, start, _): (TextSize, StringKind, TextSize),
(_, start, _): (TextSize, AnyStringKind, TextSize),
(_, elements, _): (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
(_, _, _): (TextSize, token::Tok, TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
@ -36230,7 +36228,7 @@ fn __action221<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, fstring_middle, _): (TextSize, (Box<str>, StringKind), TextSize),
(_, fstring_middle, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -37185,8 +37183,8 @@ fn __action282<
>(
source_code: &str,
mode: Mode,
(_, __0, _): (TextSize, (Box<str>, StringKind), TextSize),
) -> core::option::Option<(Box<str>, StringKind)>
(_, __0, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
) -> core::option::Option<(Box<str>, AnyStringKind)>
{
Some(__0)
}
@ -37199,7 +37197,7 @@ fn __action283<
mode: Mode,
__lookbehind: &TextSize,
__lookahead: &TextSize,
) -> core::option::Option<(Box<str>, StringKind)>
) -> core::option::Option<(Box<str>, AnyStringKind)>
{
None
}
@ -47957,7 +47955,7 @@ fn __action791<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, TextSize, TextSize),
@ -48017,7 +48015,7 @@ fn __action793<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
__1: (TextSize, TextSize, TextSize),
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -49121,7 +49119,7 @@ fn __action828<
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
__2: (TextSize, TextSize, TextSize),
__3: (TextSize, token::Tok, TextSize),
__4: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__4: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__5: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
__6: (TextSize, TextSize, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
@ -52139,7 +52137,7 @@ fn __action924<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
__1: (TextSize, TextSize, TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -63911,7 +63909,7 @@ fn __action1304<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
__2: (TextSize, token::Tok, TextSize),
) -> StringType
@ -63967,7 +63965,7 @@ fn __action1306<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
let __start0 = __0.2;
@ -64870,7 +64868,7 @@ fn __action1338<
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -69379,7 +69377,7 @@ fn __action1485<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
let __start0 = __0.2;
@ -72279,7 +72277,7 @@ fn __action1578<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, token::Tok, TextSize),
) -> StringType
{
@ -72307,7 +72305,7 @@ fn __action1579<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
__2: (TextSize, token::Tok, TextSize),
) -> StringType
@ -76896,7 +76894,7 @@ fn __action1716<
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, ast::Parameters, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -76927,7 +76925,7 @@ fn __action1717<
mode: Mode,
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, token::Tok, TextSize),
__2: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__2: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -78832,7 +78830,7 @@ fn __action1774<
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, ast::Parameters, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, (Box<str>, StringKind), TextSize),
__3: (TextSize, (Box<str>, AnyStringKind), TextSize),
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -78895,7 +78893,7 @@ fn __action1776<
mode: Mode,
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, token::Tok, TextSize),
__2: (TextSize, (Box<str>, StringKind), TextSize),
__2: (TextSize, (Box<str>, AnyStringKind), TextSize),
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{

View file

@ -2,11 +2,10 @@
use bstr::ByteSlice;
use ruff_python_ast::{self as ast, Expr};
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::lexer::{LexicalError, LexicalErrorType};
use crate::string_token_flags::StringKind;
use crate::token::Tok;
pub(crate) enum StringType {
@ -43,13 +42,13 @@ enum EscapedChar {
struct StringParser {
source: Box<str>,
cursor: usize,
kind: StringKind,
kind: AnyStringKind,
offset: TextSize,
range: TextRange,
}
impl StringParser {
fn new(source: Box<str>, kind: StringKind, offset: TextSize, range: TextRange) -> Self {
fn new(source: Box<str>, kind: AnyStringKind, offset: TextSize, range: TextRange) -> Self {
Self {
source,
cursor: 0,
@ -425,7 +424,7 @@ impl StringParser {
pub(crate) fn parse_string_literal(
source: Box<str>,
kind: StringKind,
kind: AnyStringKind,
range: TextRange,
) -> Result<StringType, LexicalError> {
StringParser::new(source, kind, range.start() + kind.opener_len(), range).parse()
@ -433,7 +432,7 @@ pub(crate) fn parse_string_literal(
pub(crate) fn parse_fstring_literal_element(
source: Box<str>,
kind: StringKind,
kind: AnyStringKind,
range: TextRange,
) -> Result<ast::FStringElement, LexicalError> {
StringParser::new(source, kind, range.start(), range).parse_fstring_middle()

View file

@ -1,395 +0,0 @@
use std::fmt;
use bitflags::bitflags;
use ruff_python_ast::{str::Quote, ByteStringPrefix, FStringPrefix, StringLiteralPrefix};
use ruff_text_size::{TextLen, TextSize};
bitflags! {
/// Flags that can be queried to obtain information
/// regarding the prefixes and quotes used for a string literal.
///
/// Note that not all of these flags can be validly combined -- e.g.,
/// it is invalid to combine the `U_PREFIX` flag with any other
/// of the `*_PREFIX` flags. As such, the recommended way to set the
/// prefix flags is by calling the `as_flags()` method on the
/// `StringPrefix` enum.
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct StringFlags: u8 {
/// The string uses double quotes (`"`).
/// If this flag is not set, the string uses single quotes (`'`).
const DOUBLE = 1 << 0;
/// The string is triple-quoted:
/// it begins and ends with three consecutive quote characters.
const TRIPLE_QUOTED = 1 << 1;
/// The string has a `u` or `U` prefix.
/// While this prefix is a no-op at runtime,
/// strings with this prefix can have no other prefixes set.
const U_PREFIX = 1 << 2;
/// The string has a `b` or `B` prefix.
/// This means that the string is a sequence of `int`s at runtime,
/// rather than a sequence of `str`s.
/// Strings with this flag can also be raw strings,
/// but can have no other prefixes.
const B_PREFIX = 1 << 3;
/// The string has a `f` or `F` prefix, meaning it is an f-string.
/// F-strings can also be raw strings,
/// but can have no other prefixes.
const F_PREFIX = 1 << 4;
/// The string has an `r` prefix, meaning it is a raw string.
/// F-strings and byte-strings can be raw,
/// as can strings with no other prefixes.
/// U-strings cannot be raw.
const R_PREFIX_LOWER = 1 << 5;
/// The string has an `R` prefix, meaning it is a raw string.
/// The casing of the `r`/`R` has no semantic significance at runtime;
/// see https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
/// for why we track the casing of the `r` prefix,
/// but not for any other prefix
const R_PREFIX_UPPER = 1 << 6;
}
}
/// Enumeration of all the possible valid prefixes
/// prior to a Python string literal.
///
/// Using the `as_flags()` method on variants of this enum
/// is the recommended way to set `*_PREFIX` flags from the
/// `StringFlags` bitflag, as it means that you cannot accidentally
/// set a combination of `*_PREFIX` flags that would be invalid
/// at runtime in Python.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum StringPrefix {
/// Prefixes that indicate the string is a bytestring
Bytes(ByteStringPrefix),
/// Prefixes that indicate the string is an f-string
Format(FStringPrefix),
/// All other prefixes
Regular(StringLiteralPrefix),
}
impl TryFrom<char> for StringPrefix {
type Error = String;
fn try_from(value: char) -> Result<Self, String> {
let result = match value {
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
'f' | 'F' => Self::Format(FStringPrefix::Regular),
_ => return Err(format!("Unexpected prefix '{value}'")),
};
Ok(result)
}
}
impl TryFrom<[char; 2]> for StringPrefix {
type Error = String;
fn try_from(value: [char; 2]) -> Result<Self, String> {
let result = match value {
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: false })
}
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: true })
}
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
}
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
}
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
};
Ok(result)
}
}
impl StringPrefix {
const fn as_flags(self) -> StringFlags {
match self {
// regular strings
Self::Regular(StringLiteralPrefix::Empty) => StringFlags::empty(),
Self::Regular(StringLiteralPrefix::Unicode) => StringFlags::U_PREFIX,
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
StringFlags::R_PREFIX_LOWER
}
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
StringFlags::R_PREFIX_UPPER
}
// bytestrings
Self::Bytes(ByteStringPrefix::Regular) => StringFlags::B_PREFIX,
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_LOWER)
}
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_UPPER)
}
// f-strings
Self::Format(FStringPrefix::Regular) => StringFlags::F_PREFIX,
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_LOWER)
}
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_UPPER)
}
}
}
const fn from_kind(kind: StringKind) -> Self {
let StringKind(flags) = kind;
// f-strings
if flags.contains(StringFlags::F_PREFIX) {
if flags.contains(StringFlags::R_PREFIX_LOWER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(StringFlags::R_PREFIX_UPPER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
}
return Self::Format(FStringPrefix::Regular);
}
// bytestrings
if flags.contains(StringFlags::B_PREFIX) {
if flags.contains(StringFlags::R_PREFIX_LOWER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(StringFlags::R_PREFIX_UPPER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
}
return Self::Bytes(ByteStringPrefix::Regular);
}
// all other strings
if flags.contains(StringFlags::R_PREFIX_LOWER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
}
if flags.contains(StringFlags::R_PREFIX_UPPER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
}
if flags.contains(StringFlags::U_PREFIX) {
return Self::Regular(StringLiteralPrefix::Unicode);
}
Self::Regular(StringLiteralPrefix::Empty)
}
const fn as_str(self) -> &'static str {
match self {
Self::Regular(regular_prefix) => regular_prefix.as_str(),
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
}
}
}
impl fmt::Display for StringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl Default for StringPrefix {
fn default() -> Self {
Self::Regular(StringLiteralPrefix::Empty)
}
}
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StringKind(StringFlags);
impl StringKind {
pub(crate) const fn from_prefix(prefix: StringPrefix) -> Self {
Self(prefix.as_flags())
}
pub const fn prefix(self) -> StringPrefix {
StringPrefix::from_kind(self)
}
/// Does the string have a `u` or `U` prefix?
pub const fn is_u_string(self) -> bool {
self.0.contains(StringFlags::U_PREFIX)
}
/// Does the string have an `r` or `R` prefix?
pub const fn is_raw_string(self) -> bool {
self.0
.intersects(StringFlags::R_PREFIX_LOWER.union(StringFlags::R_PREFIX_UPPER))
}
/// Does the string have an `f` or `F` prefix?
pub const fn is_f_string(self) -> bool {
self.0.contains(StringFlags::F_PREFIX)
}
/// Does the string have a `b` or `B` prefix?
pub const fn is_byte_string(self) -> bool {
self.0.contains(StringFlags::B_PREFIX)
}
/// Does the string use single or double quotes in its opener and closer?
pub const fn quote_style(self) -> Quote {
if self.0.contains(StringFlags::DOUBLE) {
Quote::Double
} else {
Quote::Single
}
}
/// Is the string triple-quoted, i.e.,
/// does it begin and end with three consecutive quote characters?
pub const fn is_triple_quoted(self) -> bool {
self.0.contains(StringFlags::TRIPLE_QUOTED)
}
/// A `str` representation of the quotes used to start and close.
/// This does not include any prefixes the string has in its opener.
pub const fn quote_str(self) -> &'static str {
if self.is_triple_quoted() {
match self.quote_style() {
Quote::Single => "'''",
Quote::Double => r#"""""#,
}
} else {
match self.quote_style() {
Quote::Single => "'",
Quote::Double => "\"",
}
}
}
/// The length of the prefixes used (if any) in the string's opener.
pub fn prefix_len(self) -> TextSize {
self.prefix().as_str().text_len()
}
/// The length of the quotes used to start and close the string.
/// This does not include the length of any prefixes the string has
/// in its opener.
pub const fn quote_len(self) -> TextSize {
if self.is_triple_quoted() {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
/// The total length of the string's opener,
/// i.e., the length of the prefixes plus the length
/// of the quotes used to open the string.
pub fn opener_len(self) -> TextSize {
self.prefix_len() + self.quote_len()
}
/// The total length of the string's closer.
/// This is always equal to `self.quote_len()`,
/// but is provided here for symmetry with the `opener_len()` method.
pub const fn closer_len(self) -> TextSize {
self.quote_len()
}
pub fn format_string_contents(self, contents: &str) -> String {
format!(
"{}{}{}{}",
self.prefix(),
self.quote_str(),
contents,
self.quote_str()
)
}
#[must_use]
pub fn with_double_quotes(mut self) -> Self {
self.0 |= StringFlags::DOUBLE;
self
}
#[must_use]
pub fn with_triple_quotes(mut self) -> Self {
self.0 |= StringFlags::TRIPLE_QUOTED;
self
}
}
impl fmt::Debug for StringKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StringKind")
.field("prefix", &self.prefix())
.field("triple_quoted", &self.is_triple_quoted())
.field("quote_style", &self.quote_style())
.finish()
}
}
impl From<StringKind> for ruff_python_ast::StringLiteralFlags {
fn from(value: StringKind) -> ruff_python_ast::StringLiteralFlags {
let mut new = ruff_python_ast::StringLiteralFlags::default();
if value.quote_style().is_double() {
new = new.with_double_quotes();
}
if value.is_triple_quoted() {
new = new.with_triple_quotes();
}
let StringPrefix::Regular(prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into a regular string",
value.prefix()
)
};
new.with_prefix(prefix)
}
}
impl From<StringKind> for ruff_python_ast::BytesLiteralFlags {
fn from(value: StringKind) -> ruff_python_ast::BytesLiteralFlags {
let mut new = ruff_python_ast::BytesLiteralFlags::default();
if value.quote_style().is_double() {
new = new.with_double_quotes();
}
if value.is_triple_quoted() {
new = new.with_triple_quotes();
}
let StringPrefix::Bytes(bytestring_prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into a bytestring",
value.prefix()
)
};
new.with_prefix(bytestring_prefix)
}
}
impl From<StringKind> for ruff_python_ast::FStringFlags {
fn from(value: StringKind) -> ruff_python_ast::FStringFlags {
let mut new = ruff_python_ast::FStringFlags::default();
if value.quote_style().is_double() {
new = new.with_double_quotes();
}
if value.is_triple_quoted() {
new = new.with_triple_quotes();
}
let StringPrefix::Format(fstring_prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into an f-string",
value.prefix()
)
};
new.with_prefix(fstring_prefix)
}
}

View file

@ -4,10 +4,9 @@
//! loosely based on the token definitions found in the [CPython source].
//!
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h;
use crate::string_token_flags::StringKind;
use crate::Mode;
use ruff_python_ast::{Int, IpyEscapeKind};
use ruff_python_ast::{AnyStringKind, Int, IpyEscapeKind};
use std::fmt;
/// The set of tokens the Python source code can be tokenized in.
@ -44,11 +43,11 @@ pub enum Tok {
value: Box<str>,
/// Flags that can be queried to determine the quote style
/// and prefixes of the string
kind: StringKind,
kind: AnyStringKind,
},
/// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
/// and the opening quote(s).
FStringStart(StringKind),
FStringStart(AnyStringKind),
/// Token value that includes the portion of text inside the f-string that's not
/// part of the expression part and isn't an opening or closing brace.
FStringMiddle {
@ -56,7 +55,7 @@ pub enum Tok {
value: Box<str>,
/// Flags that can be queried to determine the quote style
/// and prefixes of the string
kind: StringKind,
kind: AnyStringKind,
},
/// Token value for the end of an f-string. This includes the closing quote.
FStringEnd,