mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:56 +00:00
Simplify formatting of strings by using flags from the AST nodes (#10489)
This commit is contained in:
parent
fc792d1d2e
commit
7caf0d064a
22 changed files with 725 additions and 809 deletions
|
@ -34,19 +34,15 @@ use std::{char, cmp::Ordering, str::FromStr};
|
|||
use unicode_ident::{is_xid_continue, is_xid_start};
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
use ruff_python_ast::{FStringPrefix, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{
|
||||
str::Quote, AnyStringKind, AnyStringPrefix, FStringPrefix, Int, IpyEscapeKind,
|
||||
};
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::cursor::{Cursor, EOF_CHAR};
|
||||
use crate::lexer::fstring::{FStringContext, FStrings};
|
||||
use crate::lexer::indentation::{Indentation, Indentations};
|
||||
use crate::{
|
||||
soft_keywords::SoftKeywordTransformer,
|
||||
string::FStringErrorType,
|
||||
string_token_flags::{StringKind, StringPrefix},
|
||||
token::Tok,
|
||||
Mode,
|
||||
};
|
||||
use crate::{soft_keywords::SoftKeywordTransformer, string::FStringErrorType, token::Tok, Mode};
|
||||
|
||||
mod cursor;
|
||||
mod fstring;
|
||||
|
@ -188,14 +184,14 @@ impl<'source> Lexer<'source> {
|
|||
return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true }));
|
||||
}
|
||||
(_, quote @ ('\'' | '"')) => {
|
||||
if let Ok(prefix) = StringPrefix::try_from(first) {
|
||||
if let Ok(prefix) = AnyStringPrefix::try_from(first) {
|
||||
self.cursor.bump();
|
||||
return self.lex_string(prefix, quote);
|
||||
}
|
||||
}
|
||||
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
|
||||
self.cursor.bump();
|
||||
if let Ok(prefix) = StringPrefix::try_from([first, second]) {
|
||||
if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) {
|
||||
let quote = self.cursor.bump().unwrap();
|
||||
return self.lex_string(prefix, quote);
|
||||
}
|
||||
|
@ -560,11 +556,14 @@ impl<'source> Lexer<'source> {
|
|||
#[cfg(debug_assertions)]
|
||||
debug_assert_eq!(self.cursor.previous(), quote);
|
||||
|
||||
let mut kind = StringKind::from_prefix(StringPrefix::Format(prefix));
|
||||
let mut kind = AnyStringKind::default()
|
||||
.with_prefix(AnyStringPrefix::Format(prefix))
|
||||
.with_quote_style(if quote == '"' {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
});
|
||||
|
||||
if quote == '"' {
|
||||
kind = kind.with_double_quotes();
|
||||
}
|
||||
if self.cursor.eat_char2(quote, quote) {
|
||||
kind = kind.with_triple_quotes();
|
||||
}
|
||||
|
@ -708,15 +707,17 @@ impl<'source> Lexer<'source> {
|
|||
}
|
||||
|
||||
/// Lex a string literal.
|
||||
fn lex_string(&mut self, prefix: StringPrefix, quote: char) -> Result<Tok, LexicalError> {
|
||||
fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> Result<Tok, LexicalError> {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert_eq!(self.cursor.previous(), quote);
|
||||
|
||||
let mut kind = StringKind::from_prefix(prefix);
|
||||
|
||||
if quote == '"' {
|
||||
kind = kind.with_double_quotes();
|
||||
}
|
||||
let mut kind = AnyStringKind::default()
|
||||
.with_prefix(prefix)
|
||||
.with_quote_style(if quote == '"' {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
});
|
||||
|
||||
// If the next two characters are also the quote character, then we have a triple-quoted
|
||||
// string; consume those two characters and ensure that we require a triple-quote to close
|
||||
|
@ -1082,7 +1083,7 @@ impl<'source> Lexer<'source> {
|
|||
c if is_ascii_identifier_start(c) => self.lex_identifier(c)?,
|
||||
'0'..='9' => self.lex_number(c)?,
|
||||
'#' => return Ok((self.lex_comment(), self.token_range())),
|
||||
'\'' | '"' => self.lex_string(StringPrefix::default(), c)?,
|
||||
'\'' | '"' => self.lex_string(AnyStringPrefix::default(), c)?,
|
||||
'=' => {
|
||||
if self.cursor.eat_char('=') {
|
||||
Tok::EqEqual
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use crate::string_token_flags::StringKind;
|
||||
use ruff_python_ast::AnyStringKind;
|
||||
|
||||
/// The context representing the current f-string that the lexer is in.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct FStringContext {
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// The level of nesting for the lexer when it entered the current f-string.
|
||||
/// The nesting level includes all kinds of parentheses i.e., round, square,
|
||||
|
@ -17,7 +17,7 @@ pub(crate) struct FStringContext {
|
|||
}
|
||||
|
||||
impl FStringContext {
|
||||
pub(crate) const fn new(kind: StringKind, nesting: u32) -> Self {
|
||||
pub(crate) const fn new(kind: AnyStringKind, nesting: u32) -> Self {
|
||||
debug_assert!(kind.is_f_string());
|
||||
Self {
|
||||
kind,
|
||||
|
@ -26,7 +26,7 @@ impl FStringContext {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn kind(&self) -> StringKind {
|
||||
pub(crate) const fn kind(&self) -> AnyStringKind {
|
||||
debug_assert!(self.kind.is_f_string());
|
||||
self.kind
|
||||
}
|
||||
|
|
|
@ -115,7 +115,6 @@ pub use parser::{
|
|||
};
|
||||
use ruff_python_ast::{Mod, PySourceType, Suite};
|
||||
pub use string::FStringErrorType;
|
||||
pub use string_token_flags::StringKind;
|
||||
pub use token::{Tok, TokenKind};
|
||||
|
||||
use crate::lexer::LexResult;
|
||||
|
@ -128,7 +127,6 @@ pub mod lexer;
|
|||
mod parser;
|
||||
mod soft_keywords;
|
||||
mod string;
|
||||
mod string_token_flags;
|
||||
mod token;
|
||||
mod token_source;
|
||||
pub mod typing;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
// See also: https://greentreesnakes.readthedocs.io/en/latest/nodes.html#keyword
|
||||
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
|
||||
use crate::{
|
||||
FStringErrorType,
|
||||
Mode,
|
||||
|
@ -12,7 +12,6 @@ use crate::{
|
|||
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
|
||||
context::set_context,
|
||||
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
|
||||
string_token_flags::StringKind,
|
||||
token,
|
||||
invalid,
|
||||
};
|
||||
|
@ -1983,7 +1982,7 @@ extern {
|
|||
Dedent => token::Tok::Dedent,
|
||||
StartModule => token::Tok::StartModule,
|
||||
StartExpression => token::Tok::StartExpression,
|
||||
fstring_start => token::Tok::FStringStart(<StringKind>),
|
||||
fstring_start => token::Tok::FStringStart(<AnyStringKind>),
|
||||
FStringEnd => token::Tok::FStringEnd,
|
||||
"!" => token::Tok::Exclamation,
|
||||
"?" => token::Tok::Question,
|
||||
|
@ -2076,11 +2075,11 @@ extern {
|
|||
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
|
||||
string => token::Tok::String {
|
||||
value: <Box<str>>,
|
||||
kind: <StringKind>,
|
||||
kind: <AnyStringKind>,
|
||||
},
|
||||
fstring_middle => token::Tok::FStringMiddle {
|
||||
value: <Box<str>>,
|
||||
kind: <StringKind>,
|
||||
kind: <AnyStringKind>,
|
||||
},
|
||||
name => token::Tok::Name { name: <Box<str>> },
|
||||
ipy_escape_command => token::Tok::IpyEscapeCommand {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// auto-generated: "lalrpop 0.20.0"
|
||||
// sha3: c98876ae871e13c1a0cabf962138ded61584185a0c3144b626dac60f707ea396
|
||||
// sha3: 4ca26eae1233cf922ef88887715de0a4ca45076324249a20b87f095e9638165d
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
|
||||
use crate::{
|
||||
FStringErrorType,
|
||||
Mode,
|
||||
|
@ -9,7 +9,6 @@ use crate::{
|
|||
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
|
||||
context::set_context,
|
||||
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
|
||||
string_token_flags::StringKind,
|
||||
token,
|
||||
invalid,
|
||||
};
|
||||
|
@ -26,7 +25,7 @@ extern crate alloc;
|
|||
mod __parse__Top {
|
||||
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
|
||||
use crate::{
|
||||
FStringErrorType,
|
||||
Mode,
|
||||
|
@ -34,7 +33,6 @@ mod __parse__Top {
|
|||
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
|
||||
context::set_context,
|
||||
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
|
||||
string_token_flags::StringKind,
|
||||
token,
|
||||
invalid,
|
||||
};
|
||||
|
@ -52,8 +50,8 @@ mod __parse__Top {
|
|||
Variant0(token::Tok),
|
||||
Variant1((f64, f64)),
|
||||
Variant2(f64),
|
||||
Variant3((Box<str>, StringKind)),
|
||||
Variant4(StringKind),
|
||||
Variant3((Box<str>, AnyStringKind)),
|
||||
Variant4(AnyStringKind),
|
||||
Variant5(Int),
|
||||
Variant6((IpyEscapeKind, Box<str>)),
|
||||
Variant7(Box<str>),
|
||||
|
@ -151,7 +149,7 @@ mod __parse__Top {
|
|||
Variant99(ast::TypeParams),
|
||||
Variant100(core::option::Option<ast::TypeParams>),
|
||||
Variant101(ast::UnaryOp),
|
||||
Variant102(core::option::Option<(Box<str>, StringKind)>),
|
||||
Variant102(core::option::Option<(Box<str>, AnyStringKind)>),
|
||||
}
|
||||
const __ACTION: &[i16] = &[
|
||||
// State 0
|
||||
|
@ -18322,7 +18320,7 @@ mod __parse__Top {
|
|||
fn __pop_Variant3<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, (Box<str>, StringKind), TextSize)
|
||||
) -> (TextSize, (Box<str>, AnyStringKind), TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant3(__v), __r)) => (__l, __v, __r),
|
||||
|
@ -18479,6 +18477,16 @@ mod __parse__Top {
|
|||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant4<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, AnyStringKind, TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
|
||||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant7<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
|
@ -18509,16 +18517,6 @@ mod __parse__Top {
|
|||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant4<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, StringKind, TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
|
||||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant67<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
|
@ -19102,7 +19100,7 @@ mod __parse__Top {
|
|||
fn __pop_Variant102<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize)
|
||||
) -> (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant102(__v), __r)) => (__l, __v, __r),
|
||||
|
@ -35724,7 +35722,7 @@ fn __action185<
|
|||
(_, parameters, _): (TextSize, core::option::Option<ast::Parameters>, TextSize),
|
||||
(_, end_location_args, _): (TextSize, TextSize, TextSize),
|
||||
(_, _, _): (TextSize, token::Tok, TextSize),
|
||||
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
(_, body, _): (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
|
@ -36179,7 +36177,7 @@ fn __action218<
|
|||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, location, _): (TextSize, TextSize, TextSize),
|
||||
(_, string, _): (TextSize, (Box<str>, StringKind), TextSize),
|
||||
(_, string, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -36196,7 +36194,7 @@ fn __action219<
|
|||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, location, _): (TextSize, TextSize, TextSize),
|
||||
(_, start, _): (TextSize, StringKind, TextSize),
|
||||
(_, start, _): (TextSize, AnyStringKind, TextSize),
|
||||
(_, elements, _): (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
(_, _, _): (TextSize, token::Tok, TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
|
@ -36230,7 +36228,7 @@ fn __action221<
|
|||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, location, _): (TextSize, TextSize, TextSize),
|
||||
(_, fstring_middle, _): (TextSize, (Box<str>, StringKind), TextSize),
|
||||
(_, fstring_middle, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -37185,8 +37183,8 @@ fn __action282<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, __0, _): (TextSize, (Box<str>, StringKind), TextSize),
|
||||
) -> core::option::Option<(Box<str>, StringKind)>
|
||||
(_, __0, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
) -> core::option::Option<(Box<str>, AnyStringKind)>
|
||||
{
|
||||
Some(__0)
|
||||
}
|
||||
|
@ -37199,7 +37197,7 @@ fn __action283<
|
|||
mode: Mode,
|
||||
__lookbehind: &TextSize,
|
||||
__lookahead: &TextSize,
|
||||
) -> core::option::Option<(Box<str>, StringKind)>
|
||||
) -> core::option::Option<(Box<str>, AnyStringKind)>
|
||||
{
|
||||
None
|
||||
}
|
||||
|
@ -47957,7 +47955,7 @@ fn __action791<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, TextSize, TextSize),
|
||||
|
@ -48017,7 +48015,7 @@ fn __action793<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__1: (TextSize, TextSize, TextSize),
|
||||
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -49121,7 +49119,7 @@ fn __action828<
|
|||
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
|
||||
__2: (TextSize, TextSize, TextSize),
|
||||
__3: (TextSize, token::Tok, TextSize),
|
||||
__4: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__4: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__5: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
__6: (TextSize, TextSize, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
|
@ -52139,7 +52137,7 @@ fn __action924<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__1: (TextSize, TextSize, TextSize),
|
||||
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -63911,7 +63909,7 @@ fn __action1304<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
) -> StringType
|
||||
|
@ -63967,7 +63965,7 @@ fn __action1306<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
let __start0 = __0.2;
|
||||
|
@ -64870,7 +64868,7 @@ fn __action1338<
|
|||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -69379,7 +69377,7 @@ fn __action1485<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
let __start0 = __0.2;
|
||||
|
@ -72279,7 +72277,7 @@ fn __action1578<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, token::Tok, TextSize),
|
||||
) -> StringType
|
||||
{
|
||||
|
@ -72307,7 +72305,7 @@ fn __action1579<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
) -> StringType
|
||||
|
@ -76896,7 +76894,7 @@ fn __action1716<
|
|||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, ast::Parameters, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -76927,7 +76925,7 @@ fn __action1717<
|
|||
mode: Mode,
|
||||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, token::Tok, TextSize),
|
||||
__2: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__2: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -78832,7 +78830,7 @@ fn __action1774<
|
|||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, ast::Parameters, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__3: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -78895,7 +78893,7 @@ fn __action1776<
|
|||
mode: Mode,
|
||||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, token::Tok, TextSize),
|
||||
__2: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__2: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
|
|
@ -2,11 +2,10 @@
|
|||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use ruff_python_ast::{self as ast, Expr};
|
||||
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
||||
use crate::string_token_flags::StringKind;
|
||||
use crate::token::Tok;
|
||||
|
||||
pub(crate) enum StringType {
|
||||
|
@ -43,13 +42,13 @@ enum EscapedChar {
|
|||
struct StringParser {
|
||||
source: Box<str>,
|
||||
cursor: usize,
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
offset: TextSize,
|
||||
range: TextRange,
|
||||
}
|
||||
|
||||
impl StringParser {
|
||||
fn new(source: Box<str>, kind: StringKind, offset: TextSize, range: TextRange) -> Self {
|
||||
fn new(source: Box<str>, kind: AnyStringKind, offset: TextSize, range: TextRange) -> Self {
|
||||
Self {
|
||||
source,
|
||||
cursor: 0,
|
||||
|
@ -425,7 +424,7 @@ impl StringParser {
|
|||
|
||||
pub(crate) fn parse_string_literal(
|
||||
source: Box<str>,
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
range: TextRange,
|
||||
) -> Result<StringType, LexicalError> {
|
||||
StringParser::new(source, kind, range.start() + kind.opener_len(), range).parse()
|
||||
|
@ -433,7 +432,7 @@ pub(crate) fn parse_string_literal(
|
|||
|
||||
pub(crate) fn parse_fstring_literal_element(
|
||||
source: Box<str>,
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
range: TextRange,
|
||||
) -> Result<ast::FStringElement, LexicalError> {
|
||||
StringParser::new(source, kind, range.start(), range).parse_fstring_middle()
|
||||
|
|
|
@ -1,395 +0,0 @@
|
|||
use std::fmt;
|
||||
|
||||
use bitflags::bitflags;
|
||||
|
||||
use ruff_python_ast::{str::Quote, ByteStringPrefix, FStringPrefix, StringLiteralPrefix};
|
||||
use ruff_text_size::{TextLen, TextSize};
|
||||
|
||||
bitflags! {
|
||||
/// Flags that can be queried to obtain information
|
||||
/// regarding the prefixes and quotes used for a string literal.
|
||||
///
|
||||
/// Note that not all of these flags can be validly combined -- e.g.,
|
||||
/// it is invalid to combine the `U_PREFIX` flag with any other
|
||||
/// of the `*_PREFIX` flags. As such, the recommended way to set the
|
||||
/// prefix flags is by calling the `as_flags()` method on the
|
||||
/// `StringPrefix` enum.
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
struct StringFlags: u8 {
|
||||
/// The string uses double quotes (`"`).
|
||||
/// If this flag is not set, the string uses single quotes (`'`).
|
||||
const DOUBLE = 1 << 0;
|
||||
|
||||
/// The string is triple-quoted:
|
||||
/// it begins and ends with three consecutive quote characters.
|
||||
const TRIPLE_QUOTED = 1 << 1;
|
||||
|
||||
/// The string has a `u` or `U` prefix.
|
||||
/// While this prefix is a no-op at runtime,
|
||||
/// strings with this prefix can have no other prefixes set.
|
||||
const U_PREFIX = 1 << 2;
|
||||
|
||||
/// The string has a `b` or `B` prefix.
|
||||
/// This means that the string is a sequence of `int`s at runtime,
|
||||
/// rather than a sequence of `str`s.
|
||||
/// Strings with this flag can also be raw strings,
|
||||
/// but can have no other prefixes.
|
||||
const B_PREFIX = 1 << 3;
|
||||
|
||||
/// The string has a `f` or `F` prefix, meaning it is an f-string.
|
||||
/// F-strings can also be raw strings,
|
||||
/// but can have no other prefixes.
|
||||
const F_PREFIX = 1 << 4;
|
||||
|
||||
/// The string has an `r` prefix, meaning it is a raw string.
|
||||
/// F-strings and byte-strings can be raw,
|
||||
/// as can strings with no other prefixes.
|
||||
/// U-strings cannot be raw.
|
||||
const R_PREFIX_LOWER = 1 << 5;
|
||||
|
||||
/// The string has an `R` prefix, meaning it is a raw string.
|
||||
/// The casing of the `r`/`R` has no semantic significance at runtime;
|
||||
/// see https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
|
||||
/// for why we track the casing of the `r` prefix,
|
||||
/// but not for any other prefix
|
||||
const R_PREFIX_UPPER = 1 << 6;
|
||||
}
|
||||
}
|
||||
|
||||
/// Enumeration of all the possible valid prefixes
|
||||
/// prior to a Python string literal.
|
||||
///
|
||||
/// Using the `as_flags()` method on variants of this enum
|
||||
/// is the recommended way to set `*_PREFIX` flags from the
|
||||
/// `StringFlags` bitflag, as it means that you cannot accidentally
|
||||
/// set a combination of `*_PREFIX` flags that would be invalid
|
||||
/// at runtime in Python.
|
||||
///
|
||||
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
/// [PEP 701]: https://peps.python.org/pep-0701/
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum StringPrefix {
|
||||
/// Prefixes that indicate the string is a bytestring
|
||||
Bytes(ByteStringPrefix),
|
||||
|
||||
/// Prefixes that indicate the string is an f-string
|
||||
Format(FStringPrefix),
|
||||
|
||||
/// All other prefixes
|
||||
Regular(StringLiteralPrefix),
|
||||
}
|
||||
|
||||
impl TryFrom<char> for StringPrefix {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(value: char) -> Result<Self, String> {
|
||||
let result = match value {
|
||||
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
|
||||
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
|
||||
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
|
||||
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
|
||||
'f' | 'F' => Self::Format(FStringPrefix::Regular),
|
||||
_ => return Err(format!("Unexpected prefix '{value}'")),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<[char; 2]> for StringPrefix {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(value: [char; 2]) -> Result<Self, String> {
|
||||
let result = match value {
|
||||
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: false })
|
||||
}
|
||||
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: true })
|
||||
}
|
||||
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
|
||||
}
|
||||
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
|
||||
}
|
||||
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl StringPrefix {
|
||||
const fn as_flags(self) -> StringFlags {
|
||||
match self {
|
||||
// regular strings
|
||||
Self::Regular(StringLiteralPrefix::Empty) => StringFlags::empty(),
|
||||
Self::Regular(StringLiteralPrefix::Unicode) => StringFlags::U_PREFIX,
|
||||
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
|
||||
StringFlags::R_PREFIX_LOWER
|
||||
}
|
||||
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
|
||||
StringFlags::R_PREFIX_UPPER
|
||||
}
|
||||
|
||||
// bytestrings
|
||||
Self::Bytes(ByteStringPrefix::Regular) => StringFlags::B_PREFIX,
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
|
||||
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_LOWER)
|
||||
}
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
|
||||
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_UPPER)
|
||||
}
|
||||
|
||||
// f-strings
|
||||
Self::Format(FStringPrefix::Regular) => StringFlags::F_PREFIX,
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
|
||||
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_LOWER)
|
||||
}
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
|
||||
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_UPPER)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fn from_kind(kind: StringKind) -> Self {
|
||||
let StringKind(flags) = kind;
|
||||
|
||||
// f-strings
|
||||
if flags.contains(StringFlags::F_PREFIX) {
|
||||
if flags.contains(StringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
|
||||
}
|
||||
if flags.contains(StringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
|
||||
}
|
||||
return Self::Format(FStringPrefix::Regular);
|
||||
}
|
||||
|
||||
// bytestrings
|
||||
if flags.contains(StringFlags::B_PREFIX) {
|
||||
if flags.contains(StringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
|
||||
}
|
||||
if flags.contains(StringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
|
||||
}
|
||||
return Self::Bytes(ByteStringPrefix::Regular);
|
||||
}
|
||||
|
||||
// all other strings
|
||||
if flags.contains(StringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
|
||||
}
|
||||
if flags.contains(StringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
|
||||
}
|
||||
if flags.contains(StringFlags::U_PREFIX) {
|
||||
return Self::Regular(StringLiteralPrefix::Unicode);
|
||||
}
|
||||
Self::Regular(StringLiteralPrefix::Empty)
|
||||
}
|
||||
|
||||
const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Regular(regular_prefix) => regular_prefix.as_str(),
|
||||
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
|
||||
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for StringPrefix {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for StringPrefix {
|
||||
fn default() -> Self {
|
||||
Self::Regular(StringLiteralPrefix::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct StringKind(StringFlags);
|
||||
|
||||
impl StringKind {
|
||||
pub(crate) const fn from_prefix(prefix: StringPrefix) -> Self {
|
||||
Self(prefix.as_flags())
|
||||
}
|
||||
|
||||
pub const fn prefix(self) -> StringPrefix {
|
||||
StringPrefix::from_kind(self)
|
||||
}
|
||||
|
||||
/// Does the string have a `u` or `U` prefix?
|
||||
pub const fn is_u_string(self) -> bool {
|
||||
self.0.contains(StringFlags::U_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string have an `r` or `R` prefix?
|
||||
pub const fn is_raw_string(self) -> bool {
|
||||
self.0
|
||||
.intersects(StringFlags::R_PREFIX_LOWER.union(StringFlags::R_PREFIX_UPPER))
|
||||
}
|
||||
|
||||
/// Does the string have an `f` or `F` prefix?
|
||||
pub const fn is_f_string(self) -> bool {
|
||||
self.0.contains(StringFlags::F_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string have a `b` or `B` prefix?
|
||||
pub const fn is_byte_string(self) -> bool {
|
||||
self.0.contains(StringFlags::B_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string use single or double quotes in its opener and closer?
|
||||
pub const fn quote_style(self) -> Quote {
|
||||
if self.0.contains(StringFlags::DOUBLE) {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the string triple-quoted, i.e.,
|
||||
/// does it begin and end with three consecutive quote characters?
|
||||
pub const fn is_triple_quoted(self) -> bool {
|
||||
self.0.contains(StringFlags::TRIPLE_QUOTED)
|
||||
}
|
||||
|
||||
/// A `str` representation of the quotes used to start and close.
|
||||
/// This does not include any prefixes the string has in its opener.
|
||||
pub const fn quote_str(self) -> &'static str {
|
||||
if self.is_triple_quoted() {
|
||||
match self.quote_style() {
|
||||
Quote::Single => "'''",
|
||||
Quote::Double => r#"""""#,
|
||||
}
|
||||
} else {
|
||||
match self.quote_style() {
|
||||
Quote::Single => "'",
|
||||
Quote::Double => "\"",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The length of the prefixes used (if any) in the string's opener.
|
||||
pub fn prefix_len(self) -> TextSize {
|
||||
self.prefix().as_str().text_len()
|
||||
}
|
||||
|
||||
/// The length of the quotes used to start and close the string.
|
||||
/// This does not include the length of any prefixes the string has
|
||||
/// in its opener.
|
||||
pub const fn quote_len(self) -> TextSize {
|
||||
if self.is_triple_quoted() {
|
||||
TextSize::new(3)
|
||||
} else {
|
||||
TextSize::new(1)
|
||||
}
|
||||
}
|
||||
|
||||
/// The total length of the string's opener,
|
||||
/// i.e., the length of the prefixes plus the length
|
||||
/// of the quotes used to open the string.
|
||||
pub fn opener_len(self) -> TextSize {
|
||||
self.prefix_len() + self.quote_len()
|
||||
}
|
||||
|
||||
/// The total length of the string's closer.
|
||||
/// This is always equal to `self.quote_len()`,
|
||||
/// but is provided here for symmetry with the `opener_len()` method.
|
||||
pub const fn closer_len(self) -> TextSize {
|
||||
self.quote_len()
|
||||
}
|
||||
|
||||
pub fn format_string_contents(self, contents: &str) -> String {
|
||||
format!(
|
||||
"{}{}{}{}",
|
||||
self.prefix(),
|
||||
self.quote_str(),
|
||||
contents,
|
||||
self.quote_str()
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_double_quotes(mut self) -> Self {
|
||||
self.0 |= StringFlags::DOUBLE;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_triple_quotes(mut self) -> Self {
|
||||
self.0 |= StringFlags::TRIPLE_QUOTED;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for StringKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("StringKind")
|
||||
.field("prefix", &self.prefix())
|
||||
.field("triple_quoted", &self.is_triple_quoted())
|
||||
.field("quote_style", &self.quote_style())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringKind> for ruff_python_ast::StringLiteralFlags {
|
||||
fn from(value: StringKind) -> ruff_python_ast::StringLiteralFlags {
|
||||
let mut new = ruff_python_ast::StringLiteralFlags::default();
|
||||
if value.quote_style().is_double() {
|
||||
new = new.with_double_quotes();
|
||||
}
|
||||
if value.is_triple_quoted() {
|
||||
new = new.with_triple_quotes();
|
||||
}
|
||||
let StringPrefix::Regular(prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into a regular string",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
new.with_prefix(prefix)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringKind> for ruff_python_ast::BytesLiteralFlags {
|
||||
fn from(value: StringKind) -> ruff_python_ast::BytesLiteralFlags {
|
||||
let mut new = ruff_python_ast::BytesLiteralFlags::default();
|
||||
if value.quote_style().is_double() {
|
||||
new = new.with_double_quotes();
|
||||
}
|
||||
if value.is_triple_quoted() {
|
||||
new = new.with_triple_quotes();
|
||||
}
|
||||
let StringPrefix::Bytes(bytestring_prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into a bytestring",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
new.with_prefix(bytestring_prefix)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringKind> for ruff_python_ast::FStringFlags {
|
||||
fn from(value: StringKind) -> ruff_python_ast::FStringFlags {
|
||||
let mut new = ruff_python_ast::FStringFlags::default();
|
||||
if value.quote_style().is_double() {
|
||||
new = new.with_double_quotes();
|
||||
}
|
||||
if value.is_triple_quoted() {
|
||||
new = new.with_triple_quotes();
|
||||
}
|
||||
let StringPrefix::Format(fstring_prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into an f-string",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
new.with_prefix(fstring_prefix)
|
||||
}
|
||||
}
|
|
@ -4,10 +4,9 @@
|
|||
//! loosely based on the token definitions found in the [CPython source].
|
||||
//!
|
||||
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h;
|
||||
use crate::string_token_flags::StringKind;
|
||||
use crate::Mode;
|
||||
|
||||
use ruff_python_ast::{Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{AnyStringKind, Int, IpyEscapeKind};
|
||||
use std::fmt;
|
||||
|
||||
/// The set of tokens the Python source code can be tokenized in.
|
||||
|
@ -44,11 +43,11 @@ pub enum Tok {
|
|||
value: Box<str>,
|
||||
/// Flags that can be queried to determine the quote style
|
||||
/// and prefixes of the string
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
},
|
||||
/// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
|
||||
/// and the opening quote(s).
|
||||
FStringStart(StringKind),
|
||||
FStringStart(AnyStringKind),
|
||||
/// Token value that includes the portion of text inside the f-string that's not
|
||||
/// part of the expression part and isn't an opening or closing brace.
|
||||
FStringMiddle {
|
||||
|
@ -56,7 +55,7 @@ pub enum Tok {
|
|||
value: Box<str>,
|
||||
/// Flags that can be queried to determine the quote style
|
||||
/// and prefixes of the string
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
},
|
||||
/// Token value for the end of an f-string. This includes the closing quote.
|
||||
FStringEnd,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue