mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00
Simplify formatting of strings by using flags from the AST nodes (#10489)
This commit is contained in:
parent
fc792d1d2e
commit
7caf0d064a
22 changed files with 725 additions and 809 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2251,7 +2251,6 @@ name = "ruff_python_formatter"
|
|||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.4.2",
|
||||
"clap",
|
||||
"countme",
|
||||
"insta",
|
||||
|
|
|
@ -2,14 +2,14 @@ use std::str::FromStr;
|
|||
|
||||
use ruff_diagnostics::{Diagnostic, Violation};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::Expr;
|
||||
use ruff_python_ast::{AnyStringKind, Expr};
|
||||
use ruff_python_literal::{
|
||||
cformat::{CFormatErrorType, CFormatString},
|
||||
format::FormatPart,
|
||||
format::FromTemplate,
|
||||
format::{FormatSpec, FormatSpecError, FormatString},
|
||||
};
|
||||
use ruff_python_parser::{lexer, Mode, StringKind, Tok};
|
||||
use ruff_python_parser::{lexer, Mode, Tok};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
|
@ -92,7 +92,7 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
|
|||
/// Ex) `"%z" % "1"`
|
||||
pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
|
||||
// Grab each string segment (in case there's an implicit concatenation).
|
||||
let mut strings: Vec<(TextRange, StringKind)> = vec![];
|
||||
let mut strings: Vec<(TextRange, AnyStringKind)> = vec![];
|
||||
for (tok, range) in
|
||||
lexer::lex_starts_at(checker.locator().slice(expr), Mode::Module, expr.start()).flatten()
|
||||
{
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
use std::str::FromStr;
|
||||
|
||||
use ruff_python_ast::{self as ast, Expr};
|
||||
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
|
||||
use ruff_python_literal::cformat::{CFormatPart, CFormatSpec, CFormatStrOrBytes, CFormatString};
|
||||
use ruff_python_parser::{lexer, AsMode, StringKind, Tok};
|
||||
use ruff_python_parser::{lexer, AsMode, Tok};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
|
@ -218,7 +218,7 @@ fn is_valid_dict(
|
|||
pub(crate) fn bad_string_format_type(checker: &mut Checker, expr: &Expr, right: &Expr) {
|
||||
// Grab each string segment (in case there's an implicit concatenation).
|
||||
let content = checker.locator().slice(expr);
|
||||
let mut strings: Vec<(TextRange, StringKind)> = vec![];
|
||||
let mut strings: Vec<(TextRange, AnyStringKind)> = vec![];
|
||||
for (tok, range) in
|
||||
lexer::lex_starts_at(content, checker.source_type.as_mode(), expr.start()).flatten()
|
||||
{
|
||||
|
|
|
@ -4,12 +4,12 @@ use std::str::FromStr;
|
|||
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::whitespace::indentation;
|
||||
use ruff_python_ast::{self as ast, Expr};
|
||||
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
|
||||
use ruff_python_codegen::Stylist;
|
||||
use ruff_python_literal::cformat::{
|
||||
CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString,
|
||||
};
|
||||
use ruff_python_parser::{lexer, AsMode, StringKind, Tok};
|
||||
use ruff_python_parser::{lexer, AsMode, Tok};
|
||||
use ruff_python_stdlib::identifiers::is_identifier;
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
@ -352,7 +352,7 @@ fn convertible(format_string: &CFormatString, params: &Expr) -> bool {
|
|||
/// UP031
|
||||
pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right: &Expr) {
|
||||
// Grab each string segment (in case there's an implicit concatenation).
|
||||
let mut strings: Vec<(TextRange, StringKind)> = vec![];
|
||||
let mut strings: Vec<(TextRange, AnyStringKind)> = vec![];
|
||||
let mut extension = None;
|
||||
for (tok, range) in lexer::lex_starts_at(
|
||||
checker.locator().slice(expr),
|
||||
|
|
|
@ -9,7 +9,7 @@ use std::slice::{Iter, IterMut};
|
|||
use bitflags::bitflags;
|
||||
use itertools::Itertools;
|
||||
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::{int, str::Quote, LiteralExpressionRef};
|
||||
|
||||
|
@ -1244,8 +1244,9 @@ pub struct FStringFlags(FStringFlagsInner);
|
|||
|
||||
impl FStringFlags {
|
||||
#[must_use]
|
||||
pub fn with_double_quotes(mut self) -> Self {
|
||||
self.0 |= FStringFlagsInner::DOUBLE;
|
||||
pub fn with_quote_style(mut self, quote_style: Quote) -> Self {
|
||||
self.0
|
||||
.set(FStringFlagsInner::DOUBLE, quote_style.is_double());
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -1256,18 +1257,16 @@ impl FStringFlags {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_prefix(self, prefix: FStringPrefix) -> Self {
|
||||
let FStringFlags(flags) = self;
|
||||
pub fn with_prefix(mut self, prefix: FStringPrefix) -> Self {
|
||||
match prefix {
|
||||
FStringPrefix::Regular => {
|
||||
Self(flags - FStringFlagsInner::R_PREFIX_LOWER - FStringFlagsInner::R_PREFIX_UPPER)
|
||||
Self(self.0 - FStringFlagsInner::R_PREFIX_LOWER - FStringFlagsInner::R_PREFIX_UPPER)
|
||||
}
|
||||
FStringPrefix::Raw { uppercase_r } => {
|
||||
self.0.set(FStringFlagsInner::R_PREFIX_UPPER, uppercase_r);
|
||||
self.0.set(FStringFlagsInner::R_PREFIX_LOWER, !uppercase_r);
|
||||
self
|
||||
}
|
||||
FStringPrefix::Raw { uppercase_r: true } => Self(
|
||||
(flags | FStringFlagsInner::R_PREFIX_UPPER) - FStringFlagsInner::R_PREFIX_LOWER,
|
||||
),
|
||||
FStringPrefix::Raw { uppercase_r: false } => Self(
|
||||
(flags | FStringFlagsInner::R_PREFIX_LOWER) - FStringFlagsInner::R_PREFIX_UPPER,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1582,8 +1581,9 @@ pub struct StringLiteralFlags(StringLiteralFlagsInner);
|
|||
|
||||
impl StringLiteralFlags {
|
||||
#[must_use]
|
||||
pub fn with_double_quotes(mut self) -> Self {
|
||||
self.0 |= StringLiteralFlagsInner::DOUBLE;
|
||||
pub fn with_quote_style(mut self, quote_style: Quote) -> Self {
|
||||
self.0
|
||||
.set(StringLiteralFlagsInner::DOUBLE, quote_style.is_double());
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -1996,8 +1996,9 @@ pub struct BytesLiteralFlags(BytesLiteralFlagsInner);
|
|||
|
||||
impl BytesLiteralFlags {
|
||||
#[must_use]
|
||||
pub fn with_double_quotes(mut self) -> Self {
|
||||
self.0 |= BytesLiteralFlagsInner::DOUBLE;
|
||||
pub fn with_quote_style(mut self, quote_style: Quote) -> Self {
|
||||
self.0
|
||||
.set(BytesLiteralFlagsInner::DOUBLE, quote_style.is_double());
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -2008,23 +2009,20 @@ impl BytesLiteralFlags {
|
|||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_prefix(self, prefix: ByteStringPrefix) -> Self {
|
||||
let BytesLiteralFlags(flags) = self;
|
||||
pub fn with_prefix(mut self, prefix: ByteStringPrefix) -> Self {
|
||||
match prefix {
|
||||
ByteStringPrefix::Regular => Self(
|
||||
flags
|
||||
- BytesLiteralFlagsInner::R_PREFIX_LOWER
|
||||
- BytesLiteralFlagsInner::R_PREFIX_UPPER,
|
||||
),
|
||||
ByteStringPrefix::Raw { uppercase_r: true } => Self(
|
||||
(flags | BytesLiteralFlagsInner::R_PREFIX_UPPER)
|
||||
- BytesLiteralFlagsInner::R_PREFIX_LOWER,
|
||||
),
|
||||
ByteStringPrefix::Raw { uppercase_r: false } => Self(
|
||||
(flags | BytesLiteralFlagsInner::R_PREFIX_LOWER)
|
||||
- BytesLiteralFlagsInner::R_PREFIX_UPPER,
|
||||
),
|
||||
}
|
||||
ByteStringPrefix::Regular => {
|
||||
self.0 -= BytesLiteralFlagsInner::R_PREFIX_LOWER;
|
||||
self.0 -= BytesLiteralFlagsInner::R_PREFIX_UPPER;
|
||||
}
|
||||
ByteStringPrefix::Raw { uppercase_r } => {
|
||||
self.0
|
||||
.set(BytesLiteralFlagsInner::R_PREFIX_UPPER, uppercase_r);
|
||||
self.0
|
||||
.set(BytesLiteralFlagsInner::R_PREFIX_LOWER, !uppercase_r);
|
||||
}
|
||||
};
|
||||
self
|
||||
}
|
||||
|
||||
pub const fn prefix(self) -> ByteStringPrefix {
|
||||
|
@ -2108,6 +2106,439 @@ impl From<BytesLiteral> for Expr {
|
|||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
/// Flags that can be queried to obtain information
|
||||
/// regarding the prefixes and quotes used for a string literal.
|
||||
///
|
||||
/// Note that not all of these flags can be validly combined -- e.g.,
|
||||
/// it is invalid to combine the `U_PREFIX` flag with any other
|
||||
/// of the `*_PREFIX` flags. As such, the recommended way to set the
|
||||
/// prefix flags is by calling the `as_flags()` method on the
|
||||
/// `StringPrefix` enum.
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
struct AnyStringFlags: u8 {
|
||||
/// The string uses double quotes (`"`).
|
||||
/// If this flag is not set, the string uses single quotes (`'`).
|
||||
const DOUBLE = 1 << 0;
|
||||
|
||||
/// The string is triple-quoted:
|
||||
/// it begins and ends with three consecutive quote characters.
|
||||
const TRIPLE_QUOTED = 1 << 1;
|
||||
|
||||
/// The string has a `u` or `U` prefix.
|
||||
/// While this prefix is a no-op at runtime,
|
||||
/// strings with this prefix can have no other prefixes set.
|
||||
const U_PREFIX = 1 << 2;
|
||||
|
||||
/// The string has a `b` or `B` prefix.
|
||||
/// This means that the string is a sequence of `int`s at runtime,
|
||||
/// rather than a sequence of `str`s.
|
||||
/// Strings with this flag can also be raw strings,
|
||||
/// but can have no other prefixes.
|
||||
const B_PREFIX = 1 << 3;
|
||||
|
||||
/// The string has a `f` or `F` prefix, meaning it is an f-string.
|
||||
/// F-strings can also be raw strings,
|
||||
/// but can have no other prefixes.
|
||||
const F_PREFIX = 1 << 4;
|
||||
|
||||
/// The string has an `r` prefix, meaning it is a raw string.
|
||||
/// F-strings and byte-strings can be raw,
|
||||
/// as can strings with no other prefixes.
|
||||
/// U-strings cannot be raw.
|
||||
const R_PREFIX_LOWER = 1 << 5;
|
||||
|
||||
/// The string has an `R` prefix, meaning it is a raw string.
|
||||
/// The casing of the `r`/`R` has no semantic significance at runtime;
|
||||
/// see https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
|
||||
/// for why we track the casing of the `r` prefix,
|
||||
/// but not for any other prefix
|
||||
const R_PREFIX_UPPER = 1 << 6;
|
||||
}
|
||||
}
|
||||
|
||||
/// Enumeration of all the possible valid prefixes
|
||||
/// prior to a Python string literal.
|
||||
///
|
||||
/// Using the `as_flags()` method on variants of this enum
|
||||
/// is the recommended way to set `*_PREFIX` flags from the
|
||||
/// `StringFlags` bitflag, as it means that you cannot accidentally
|
||||
/// set a combination of `*_PREFIX` flags that would be invalid
|
||||
/// at runtime in Python.
|
||||
///
|
||||
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
/// [PEP 701]: https://peps.python.org/pep-0701/
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum AnyStringPrefix {
|
||||
/// Prefixes that indicate the string is a bytestring
|
||||
Bytes(ByteStringPrefix),
|
||||
|
||||
/// Prefixes that indicate the string is an f-string
|
||||
Format(FStringPrefix),
|
||||
|
||||
/// All other prefixes
|
||||
Regular(StringLiteralPrefix),
|
||||
}
|
||||
|
||||
impl TryFrom<char> for AnyStringPrefix {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(value: char) -> Result<Self, String> {
|
||||
let result = match value {
|
||||
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
|
||||
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
|
||||
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
|
||||
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
|
||||
'f' | 'F' => Self::Format(FStringPrefix::Regular),
|
||||
_ => return Err(format!("Unexpected prefix '{value}'")),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<[char; 2]> for AnyStringPrefix {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(value: [char; 2]) -> Result<Self, String> {
|
||||
let result = match value {
|
||||
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: false })
|
||||
}
|
||||
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: true })
|
||||
}
|
||||
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
|
||||
}
|
||||
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
|
||||
}
|
||||
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl AnyStringPrefix {
|
||||
const fn as_flags(self) -> AnyStringFlags {
|
||||
match self {
|
||||
// regular strings
|
||||
Self::Regular(StringLiteralPrefix::Empty) => AnyStringFlags::empty(),
|
||||
Self::Regular(StringLiteralPrefix::Unicode) => AnyStringFlags::U_PREFIX,
|
||||
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
|
||||
AnyStringFlags::R_PREFIX_LOWER
|
||||
}
|
||||
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
|
||||
AnyStringFlags::R_PREFIX_UPPER
|
||||
}
|
||||
|
||||
// bytestrings
|
||||
Self::Bytes(ByteStringPrefix::Regular) => AnyStringFlags::B_PREFIX,
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
|
||||
AnyStringFlags::B_PREFIX.union(AnyStringFlags::R_PREFIX_LOWER)
|
||||
}
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
|
||||
AnyStringFlags::B_PREFIX.union(AnyStringFlags::R_PREFIX_UPPER)
|
||||
}
|
||||
|
||||
// f-strings
|
||||
Self::Format(FStringPrefix::Regular) => AnyStringFlags::F_PREFIX,
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
|
||||
AnyStringFlags::F_PREFIX.union(AnyStringFlags::R_PREFIX_LOWER)
|
||||
}
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
|
||||
AnyStringFlags::F_PREFIX.union(AnyStringFlags::R_PREFIX_UPPER)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fn from_kind(kind: AnyStringKind) -> Self {
|
||||
let AnyStringKind(flags) = kind;
|
||||
|
||||
// f-strings
|
||||
if flags.contains(AnyStringFlags::F_PREFIX) {
|
||||
if flags.contains(AnyStringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
|
||||
}
|
||||
if flags.contains(AnyStringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
|
||||
}
|
||||
return Self::Format(FStringPrefix::Regular);
|
||||
}
|
||||
|
||||
// bytestrings
|
||||
if flags.contains(AnyStringFlags::B_PREFIX) {
|
||||
if flags.contains(AnyStringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
|
||||
}
|
||||
if flags.contains(AnyStringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
|
||||
}
|
||||
return Self::Bytes(ByteStringPrefix::Regular);
|
||||
}
|
||||
|
||||
// all other strings
|
||||
if flags.contains(AnyStringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
|
||||
}
|
||||
if flags.contains(AnyStringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
|
||||
}
|
||||
if flags.contains(AnyStringFlags::U_PREFIX) {
|
||||
return Self::Regular(StringLiteralPrefix::Unicode);
|
||||
}
|
||||
Self::Regular(StringLiteralPrefix::Empty)
|
||||
}
|
||||
|
||||
pub const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Regular(regular_prefix) => regular_prefix.as_str(),
|
||||
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
|
||||
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for AnyStringPrefix {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for AnyStringPrefix {
|
||||
fn default() -> Self {
|
||||
Self::Regular(StringLiteralPrefix::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct AnyStringKind(AnyStringFlags);
|
||||
|
||||
impl AnyStringKind {
|
||||
#[must_use]
|
||||
pub fn with_prefix(mut self, prefix: AnyStringPrefix) -> Self {
|
||||
self.0 |= prefix.as_flags();
|
||||
self
|
||||
}
|
||||
|
||||
pub const fn prefix(self) -> AnyStringPrefix {
|
||||
AnyStringPrefix::from_kind(self)
|
||||
}
|
||||
|
||||
pub fn new(prefix: AnyStringPrefix, quotes: Quote, triple_quoted: bool) -> Self {
|
||||
let new = Self::default().with_prefix(prefix).with_quote_style(quotes);
|
||||
if triple_quoted {
|
||||
new.with_triple_quotes()
|
||||
} else {
|
||||
new
|
||||
}
|
||||
}
|
||||
|
||||
/// Does the string have a `u` or `U` prefix?
|
||||
pub const fn is_u_string(self) -> bool {
|
||||
self.0.contains(AnyStringFlags::U_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string have an `r` or `R` prefix?
|
||||
pub const fn is_raw_string(self) -> bool {
|
||||
self.0
|
||||
.intersects(AnyStringFlags::R_PREFIX_LOWER.union(AnyStringFlags::R_PREFIX_UPPER))
|
||||
}
|
||||
|
||||
/// Does the string have an `f` or `F` prefix?
|
||||
pub const fn is_f_string(self) -> bool {
|
||||
self.0.contains(AnyStringFlags::F_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string have a `b` or `B` prefix?
|
||||
pub const fn is_byte_string(self) -> bool {
|
||||
self.0.contains(AnyStringFlags::B_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string use single or double quotes in its opener and closer?
|
||||
pub const fn quote_style(self) -> Quote {
|
||||
if self.0.contains(AnyStringFlags::DOUBLE) {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the string triple-quoted, i.e.,
|
||||
/// does it begin and end with three consecutive quote characters?
|
||||
pub const fn is_triple_quoted(self) -> bool {
|
||||
self.0.contains(AnyStringFlags::TRIPLE_QUOTED)
|
||||
}
|
||||
|
||||
/// A `str` representation of the quotes used to start and close.
|
||||
/// This does not include any prefixes the string has in its opener.
|
||||
pub const fn quote_str(self) -> &'static str {
|
||||
if self.is_triple_quoted() {
|
||||
match self.quote_style() {
|
||||
Quote::Single => "'''",
|
||||
Quote::Double => r#"""""#,
|
||||
}
|
||||
} else {
|
||||
match self.quote_style() {
|
||||
Quote::Single => "'",
|
||||
Quote::Double => "\"",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The length of the prefixes used (if any) in the string's opener.
|
||||
pub fn prefix_len(self) -> TextSize {
|
||||
self.prefix().as_str().text_len()
|
||||
}
|
||||
|
||||
/// The length of the quotes used to start and close the string.
|
||||
/// This does not include the length of any prefixes the string has
|
||||
/// in its opener.
|
||||
pub const fn quote_len(self) -> TextSize {
|
||||
if self.is_triple_quoted() {
|
||||
TextSize::new(3)
|
||||
} else {
|
||||
TextSize::new(1)
|
||||
}
|
||||
}
|
||||
|
||||
/// The total length of the string's opener,
|
||||
/// i.e., the length of the prefixes plus the length
|
||||
/// of the quotes used to open the string.
|
||||
pub fn opener_len(self) -> TextSize {
|
||||
self.prefix_len() + self.quote_len()
|
||||
}
|
||||
|
||||
/// The total length of the string's closer.
|
||||
/// This is always equal to `self.quote_len()`,
|
||||
/// but is provided here for symmetry with the `opener_len()` method.
|
||||
pub const fn closer_len(self) -> TextSize {
|
||||
self.quote_len()
|
||||
}
|
||||
|
||||
pub fn format_string_contents(self, contents: &str) -> String {
|
||||
format!(
|
||||
"{}{}{}{}",
|
||||
self.prefix(),
|
||||
self.quote_str(),
|
||||
contents,
|
||||
self.quote_str()
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_quote_style(mut self, quotes: Quote) -> Self {
|
||||
match quotes {
|
||||
Quote::Double => self.0 |= AnyStringFlags::DOUBLE,
|
||||
Quote::Single => self.0 -= AnyStringFlags::DOUBLE,
|
||||
};
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_triple_quotes(mut self) -> Self {
|
||||
self.0 |= AnyStringFlags::TRIPLE_QUOTED;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for AnyStringKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("StringKind")
|
||||
.field("prefix", &self.prefix())
|
||||
.field("triple_quoted", &self.is_triple_quoted())
|
||||
.field("quote_style", &self.quote_style())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AnyStringKind> for StringLiteralFlags {
|
||||
fn from(value: AnyStringKind) -> StringLiteralFlags {
|
||||
let AnyStringPrefix::Regular(prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into a regular string",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
let new = StringLiteralFlags::default()
|
||||
.with_quote_style(value.quote_style())
|
||||
.with_prefix(prefix);
|
||||
if value.is_triple_quoted() {
|
||||
new.with_triple_quotes()
|
||||
} else {
|
||||
new
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringLiteralFlags> for AnyStringKind {
|
||||
fn from(value: StringLiteralFlags) -> Self {
|
||||
Self::new(
|
||||
AnyStringPrefix::Regular(value.prefix()),
|
||||
value.quote_style(),
|
||||
value.is_triple_quoted(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AnyStringKind> for BytesLiteralFlags {
|
||||
fn from(value: AnyStringKind) -> BytesLiteralFlags {
|
||||
let AnyStringPrefix::Bytes(bytestring_prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into a bytestring",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
let new = BytesLiteralFlags::default()
|
||||
.with_quote_style(value.quote_style())
|
||||
.with_prefix(bytestring_prefix);
|
||||
if value.is_triple_quoted() {
|
||||
new.with_triple_quotes()
|
||||
} else {
|
||||
new
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BytesLiteralFlags> for AnyStringKind {
|
||||
fn from(value: BytesLiteralFlags) -> Self {
|
||||
Self::new(
|
||||
AnyStringPrefix::Bytes(value.prefix()),
|
||||
value.quote_style(),
|
||||
value.is_triple_quoted(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AnyStringKind> for FStringFlags {
|
||||
fn from(value: AnyStringKind) -> FStringFlags {
|
||||
let AnyStringPrefix::Format(fstring_prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into an f-string",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
let new = FStringFlags::default()
|
||||
.with_quote_style(value.quote_style())
|
||||
.with_prefix(fstring_prefix);
|
||||
if value.is_triple_quoted() {
|
||||
new.with_triple_quotes()
|
||||
} else {
|
||||
new
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FStringFlags> for AnyStringKind {
|
||||
fn from(value: FStringFlags) -> Self {
|
||||
Self::new(
|
||||
AnyStringPrefix::Format(value.prefix()),
|
||||
value.quote_style(),
|
||||
value.is_triple_quoted(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct ExprNumberLiteral {
|
||||
pub range: TextRange,
|
||||
|
|
|
@ -25,7 +25,6 @@ ruff_python_parser = { path = "../ruff_python_parser" }
|
|||
ruff_text_size = { path = "../ruff_text_size" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
countme = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use ruff_python_ast::BytesLiteral;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{StringNormalizer, StringPart};
|
||||
use crate::string::StringNormalizer;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FormatBytesLiteral;
|
||||
|
@ -13,7 +12,7 @@ impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
|
|||
|
||||
StringNormalizer::from_context(f.context())
|
||||
.with_preferred_quote_style(f.options().quote_style())
|
||||
.normalize(&StringPart::from_source(item.range(), &locator), &locator)
|
||||
.normalize(item.into(), &locator)
|
||||
.fmt(f)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
use ruff_formatter::write;
|
||||
use ruff_python_ast::FString;
|
||||
use ruff_python_ast::{AnyStringKind, FString};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_f_string_formatting_enabled;
|
||||
use crate::string::{Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes};
|
||||
use crate::string::{Quoting, StringNormalizer, StringQuotes};
|
||||
|
||||
use super::f_string_element::FormatFStringElement;
|
||||
|
||||
|
@ -30,8 +29,6 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let locator = f.context().locator();
|
||||
|
||||
let string = StringPart::from_source(self.value.range(), &locator);
|
||||
|
||||
let normalizer = StringNormalizer::from_context(f.context())
|
||||
.with_quoting(self.quoting)
|
||||
.with_preferred_quote_style(f.options().quote_style());
|
||||
|
@ -39,7 +36,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
// If f-string formatting is disabled (not in preview), then we will
|
||||
// fall back to the previous behavior of normalizing the f-string.
|
||||
if !is_f_string_formatting_enabled(f.context()) {
|
||||
let result = normalizer.normalize(&string, &locator).fmt(f);
|
||||
let result = normalizer.normalize(self.value.into(), &locator).fmt(f);
|
||||
let comments = f.context().comments();
|
||||
self.value.elements.iter().for_each(|value| {
|
||||
comments.mark_verbatim_node_comments_formatted(value.into());
|
||||
|
@ -59,16 +56,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
return result;
|
||||
}
|
||||
|
||||
let quote_selection = normalizer.choose_quotes(&string, &locator);
|
||||
let string_kind = normalizer.choose_quotes(self.value.into(), &locator).kind();
|
||||
|
||||
let context = FStringContext::new(
|
||||
string.prefix(),
|
||||
quote_selection.quotes(),
|
||||
string_kind,
|
||||
FStringLayout::from_f_string(self.value, &locator),
|
||||
);
|
||||
|
||||
// Starting prefix and quote
|
||||
write!(f, [string.prefix(), quote_selection.quotes()])?;
|
||||
let quotes = StringQuotes::from(string_kind);
|
||||
write!(f, [string_kind.prefix(), quotes])?;
|
||||
|
||||
f.join()
|
||||
.entries(
|
||||
|
@ -80,32 +77,23 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
|
|||
.finish()?;
|
||||
|
||||
// Ending quote
|
||||
quote_selection.quotes().fmt(f)
|
||||
quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct FStringContext {
|
||||
prefix: StringPrefix,
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
layout: FStringLayout,
|
||||
}
|
||||
|
||||
impl FStringContext {
|
||||
const fn new(prefix: StringPrefix, quotes: StringQuotes, layout: FStringLayout) -> Self {
|
||||
Self {
|
||||
prefix,
|
||||
quotes,
|
||||
layout,
|
||||
}
|
||||
const fn new(kind: AnyStringKind, layout: FStringLayout) -> Self {
|
||||
Self { kind, layout }
|
||||
}
|
||||
|
||||
pub(crate) const fn quotes(self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
pub(crate) const fn prefix(self) -> StringPrefix {
|
||||
self.prefix
|
||||
pub(crate) fn kind(self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
pub(crate) const fn layout(self) -> FStringLayout {
|
||||
|
|
|
@ -56,13 +56,7 @@ impl<'a> FormatFStringLiteralElement<'a> {
|
|||
impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let literal_content = f.context().locator().slice(self.element.range());
|
||||
let normalized = normalize_string(
|
||||
literal_content,
|
||||
0,
|
||||
self.context.quotes(),
|
||||
self.context.prefix(),
|
||||
true,
|
||||
);
|
||||
let normalized = normalize_string(literal_content, 0, self.context.kind(), true);
|
||||
match &normalized {
|
||||
Cow::Borrowed(_) => source_text_slice(self.element.range()).fmt(f),
|
||||
Cow::Owned(normalized) => text(normalized).fmt(f),
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
use ruff_python_ast::StringLiteral;
|
||||
use ruff_text_size::Ranged;
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::string::{docstring, Quoting, StringNormalizer, StringPart};
|
||||
use crate::string::{docstring, Quoting, StringNormalizer};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct FormatStringLiteral<'a> {
|
||||
|
@ -61,10 +60,7 @@ impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
|
|||
let normalized = StringNormalizer::from_context(f.context())
|
||||
.with_quoting(self.layout.quoting())
|
||||
.with_preferred_quote_style(quote_style)
|
||||
.normalize(
|
||||
&StringPart::from_source(self.value.range(), &locator),
|
||||
&locator,
|
||||
);
|
||||
.normalize(self.value.into(), &locator);
|
||||
|
||||
if self.layout.is_docstring() {
|
||||
docstring::format(&normalized, f)
|
||||
|
|
|
@ -3,17 +3,17 @@ use std::iter::FusedIterator;
|
|||
use memchr::memchr2;
|
||||
|
||||
use ruff_python_ast::{
|
||||
self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
|
||||
StringLiteral,
|
||||
self as ast, AnyNodeRef, AnyStringKind, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral,
|
||||
ExpressionRef, StringLiteral,
|
||||
};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::expression::expr_f_string::f_string_quoting;
|
||||
use crate::other::f_string::FormatFString;
|
||||
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
|
||||
use crate::prelude::*;
|
||||
use crate::string::{Quoting, StringPrefix, StringQuotes};
|
||||
use crate::string::Quoting;
|
||||
|
||||
/// Represents any kind of string expression. This could be either a string,
|
||||
/// bytes or f-string.
|
||||
|
@ -70,14 +70,10 @@ impl<'a> AnyString<'a> {
|
|||
pub(crate) fn is_multiline(self, source: &str) -> bool {
|
||||
match self {
|
||||
AnyString::String(_) | AnyString::Bytes(_) => {
|
||||
let contents = &source[self.range()];
|
||||
let prefix = StringPrefix::parse(contents);
|
||||
let quotes = StringQuotes::parse(
|
||||
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
|
||||
);
|
||||
|
||||
quotes.is_some_and(StringQuotes::is_triple)
|
||||
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
|
||||
self.parts(Quoting::default())
|
||||
.next()
|
||||
.is_some_and(|part| part.kind().is_triple_quoted())
|
||||
&& memchr2(b'\n', b'\r', source[self.range()].as_bytes()).is_some()
|
||||
}
|
||||
AnyString::FString(fstring) => {
|
||||
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
|
||||
|
@ -179,6 +175,16 @@ pub(super) enum AnyStringPart<'a> {
|
|||
},
|
||||
}
|
||||
|
||||
impl AnyStringPart<'_> {
|
||||
fn kind(&self) -> AnyStringKind {
|
||||
match self {
|
||||
Self::String { part, .. } => part.flags.into(),
|
||||
Self::Bytes(bytes_literal) => bytes_literal.flags.into(),
|
||||
Self::FString { part, .. } => part.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
|
||||
fn from(value: &AnyStringPart<'a>) -> Self {
|
||||
match value {
|
||||
|
|
|
@ -18,6 +18,7 @@ use {
|
|||
ruff_text_size::{Ranged, TextLen, TextRange, TextSize},
|
||||
};
|
||||
|
||||
use crate::string::StringQuotes;
|
||||
use crate::{prelude::*, DocstringCodeLineWidth, FormatModuleError};
|
||||
|
||||
use super::NormalizedString;
|
||||
|
@ -126,7 +127,9 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
let mut lines = docstring.split('\n').peekable();
|
||||
|
||||
// Start the string
|
||||
write!(f, [normalized.prefix(), normalized.quotes()])?;
|
||||
let kind = normalized.kind();
|
||||
let quotes = StringQuotes::from(kind);
|
||||
write!(f, [kind.prefix(), quotes])?;
|
||||
// We track where in the source docstring we are (in source code byte offsets)
|
||||
let mut offset = normalized.start();
|
||||
|
||||
|
@ -142,7 +145,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
|
||||
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
|
||||
// inner quotes and closing quotes from getting to close to avoid `""""content`
|
||||
if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
|
||||
if trim_both.starts_with(quotes.quote_char.as_char()) {
|
||||
space().fmt(f)?;
|
||||
}
|
||||
|
||||
|
@ -169,7 +172,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
{
|
||||
space().fmt(f)?;
|
||||
}
|
||||
normalized.quotes().fmt(f)?;
|
||||
quotes.fmt(f)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
@ -195,7 +198,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
offset,
|
||||
stripped_indentation,
|
||||
already_normalized,
|
||||
quote_char: normalized.quotes().quote_char,
|
||||
quote_char: quotes.quote_char,
|
||||
code_example: CodeExample::default(),
|
||||
}
|
||||
.add_iter(lines)?;
|
||||
|
@ -208,7 +211,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
|
|||
space().fmt(f)?;
|
||||
}
|
||||
|
||||
write!(f, [normalized.quotes()])
|
||||
write!(f, [quotes])
|
||||
}
|
||||
|
||||
fn contains_unescaped_newline(haystack: &str) -> bool {
|
||||
|
@ -1570,7 +1573,7 @@ fn docstring_format_source(
|
|||
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
|
||||
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
|
||||
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
|
||||
trim_end.ends_with(normalized.quotes().quote_char.as_char())
|
||||
trim_end.ends_with(normalized.kind().quote_style().as_char())
|
||||
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
use bitflags::bitflags;
|
||||
|
||||
pub(crate) use any::AnyString;
|
||||
pub(crate) use normalize::{normalize_string, NormalizedString, StringNormalizer};
|
||||
use ruff_formatter::format_args;
|
||||
use ruff_python_ast::str::Quote;
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, AnyStringKind, AnyStringPrefix};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::comments::{leading_comments, trailing_comments};
|
||||
use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
|
||||
|
@ -55,132 +53,17 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct StringPart {
|
||||
/// The prefix.
|
||||
prefix: StringPrefix,
|
||||
|
||||
/// The actual quotes of the string in the source
|
||||
quotes: StringQuotes,
|
||||
|
||||
/// The range of the string's content (full range minus quotes and prefix)
|
||||
content_range: TextRange,
|
||||
}
|
||||
|
||||
impl StringPart {
|
||||
pub(crate) fn from_source(range: TextRange, locator: &Locator) -> Self {
|
||||
let string_content = locator.slice(range);
|
||||
|
||||
let prefix = StringPrefix::parse(string_content);
|
||||
let after_prefix = &string_content[usize::from(prefix.text_len())..];
|
||||
|
||||
let quotes =
|
||||
StringQuotes::parse(after_prefix).expect("Didn't find string quotes after prefix");
|
||||
let relative_raw_content_range = TextRange::new(
|
||||
prefix.text_len() + quotes.text_len(),
|
||||
string_content.text_len() - quotes.text_len(),
|
||||
);
|
||||
let raw_content_range = relative_raw_content_range + range.start();
|
||||
|
||||
Self {
|
||||
prefix,
|
||||
content_range: raw_content_range,
|
||||
quotes,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the prefix of the string part.
|
||||
pub(crate) const fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
}
|
||||
|
||||
/// Returns the surrounding quotes of the string part.
|
||||
pub(crate) const fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
/// Returns the range of the string's content in the source (minus prefix and quotes).
|
||||
pub(crate) const fn content_range(&self) -> TextRange {
|
||||
self.content_range
|
||||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct StringPrefix: u8 {
|
||||
const UNICODE = 0b0000_0001;
|
||||
/// `r"test"`
|
||||
const RAW = 0b0000_0010;
|
||||
/// `R"test"
|
||||
const RAW_UPPER = 0b0000_0100;
|
||||
const BYTE = 0b0000_1000;
|
||||
const F_STRING = 0b0001_0000;
|
||||
}
|
||||
}
|
||||
|
||||
impl StringPrefix {
|
||||
pub(crate) fn parse(input: &str) -> StringPrefix {
|
||||
let chars = input.chars();
|
||||
let mut prefix = StringPrefix::empty();
|
||||
|
||||
for c in chars {
|
||||
let flag = match c {
|
||||
'u' | 'U' => StringPrefix::UNICODE,
|
||||
'f' | 'F' => StringPrefix::F_STRING,
|
||||
'b' | 'B' => StringPrefix::BYTE,
|
||||
'r' => StringPrefix::RAW,
|
||||
'R' => StringPrefix::RAW_UPPER,
|
||||
'\'' | '"' => break,
|
||||
c => {
|
||||
unreachable!(
|
||||
"Unexpected character '{c}' terminating the prefix of a string literal"
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
prefix |= flag;
|
||||
}
|
||||
|
||||
prefix
|
||||
}
|
||||
|
||||
pub(crate) const fn text_len(self) -> TextSize {
|
||||
TextSize::new(self.bits().count_ones())
|
||||
}
|
||||
|
||||
pub(super) const fn is_raw_string(self) -> bool {
|
||||
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
|
||||
}
|
||||
|
||||
pub(super) const fn is_fstring(self) -> bool {
|
||||
self.contains(StringPrefix::F_STRING)
|
||||
}
|
||||
|
||||
pub(super) const fn is_byte(self) -> bool {
|
||||
self.contains(StringPrefix::BYTE)
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for StringPrefix {
|
||||
impl Format<PyFormatContext<'_>> for AnyStringPrefix {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
// Retain the casing for the raw prefix:
|
||||
// https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
|
||||
if self.contains(StringPrefix::RAW) {
|
||||
token("r").fmt(f)?;
|
||||
} else if self.contains(StringPrefix::RAW_UPPER) {
|
||||
token("R").fmt(f)?;
|
||||
}
|
||||
|
||||
if self.contains(StringPrefix::BYTE) {
|
||||
token("b").fmt(f)?;
|
||||
}
|
||||
|
||||
if self.contains(StringPrefix::F_STRING) {
|
||||
token("f").fmt(f)?;
|
||||
}
|
||||
|
||||
// Remove the unicode prefix `u` if any because it is meaningless in Python 3+.
|
||||
|
||||
if !matches!(
|
||||
self,
|
||||
AnyStringPrefix::Regular(
|
||||
ast::StringLiteralPrefix::Empty | ast::StringLiteralPrefix::Unicode
|
||||
)
|
||||
) {
|
||||
token(self.as_str()).fmt(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -191,34 +74,6 @@ pub(crate) struct StringQuotes {
|
|||
quote_char: Quote,
|
||||
}
|
||||
|
||||
impl StringQuotes {
|
||||
pub(crate) fn parse(input: &str) -> Option<StringQuotes> {
|
||||
let mut chars = input.chars();
|
||||
|
||||
let quote_char = chars.next()?;
|
||||
let quote = Quote::try_from(quote_char).ok()?;
|
||||
|
||||
let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);
|
||||
|
||||
Some(Self {
|
||||
triple,
|
||||
quote_char: quote,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) const fn is_triple(self) -> bool {
|
||||
self.triple
|
||||
}
|
||||
|
||||
const fn text_len(self) -> TextSize {
|
||||
if self.triple {
|
||||
TextSize::new(3)
|
||||
} else {
|
||||
TextSize::new(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Format<PyFormatContext<'_>> for StringQuotes {
|
||||
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
|
||||
let quotes = match (self.quote_char, self.triple) {
|
||||
|
@ -232,6 +87,15 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<AnyStringKind> for StringQuotes {
|
||||
fn from(value: AnyStringKind) -> Self {
|
||||
Self {
|
||||
triple: value.is_triple_quoted(),
|
||||
quote_char: value.quote_style(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<QuoteStyle> for Quote {
|
||||
type Error = ();
|
||||
|
||||
|
@ -252,3 +116,58 @@ impl From<Quote> for QuoteStyle {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) struct StringPart {
|
||||
kind: AnyStringKind,
|
||||
range: TextRange,
|
||||
}
|
||||
|
||||
impl Ranged for StringPart {
|
||||
fn range(&self) -> TextRange {
|
||||
self.range
|
||||
}
|
||||
}
|
||||
|
||||
impl StringPart {
|
||||
/// Use the `kind()` method to retrieve information about the
|
||||
fn kind(self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
/// Returns the range of the string's content in the source (minus prefix and quotes).
|
||||
fn content_range(self) -> TextRange {
|
||||
let kind = self.kind();
|
||||
TextRange::new(
|
||||
self.start() + kind.opener_len(),
|
||||
self.end() - kind.closer_len(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ast::StringLiteral> for StringPart {
|
||||
fn from(value: &ast::StringLiteral) -> Self {
|
||||
Self {
|
||||
range: value.range,
|
||||
kind: value.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ast::BytesLiteral> for StringPart {
|
||||
fn from(value: &ast::BytesLiteral) -> Self {
|
||||
Self {
|
||||
range: value.range,
|
||||
kind: value.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&ast::FString> for StringPart {
|
||||
fn from(value: &ast::FString) -> Self {
|
||||
Self {
|
||||
range: value.range,
|
||||
kind: value.flags.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::borrow::Cow;
|
|||
use std::iter::FusedIterator;
|
||||
|
||||
use ruff_formatter::FormatContext;
|
||||
use ruff_python_ast::str::Quote;
|
||||
use ruff_python_ast::{str::Quote, AnyStringKind};
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
|
@ -10,7 +10,7 @@ use crate::context::FStringState;
|
|||
use crate::options::PythonVersion;
|
||||
use crate::prelude::*;
|
||||
use crate::preview::is_f_string_formatting_enabled;
|
||||
use crate::string::{Quoting, StringPart, StringPrefix, StringQuotes};
|
||||
use crate::string::{Quoting, StringPart, StringQuotes};
|
||||
use crate::QuoteStyle;
|
||||
|
||||
pub(crate) struct StringNormalizer {
|
||||
|
@ -44,7 +44,7 @@ impl StringNormalizer {
|
|||
self
|
||||
}
|
||||
|
||||
fn quoting(&self, string: &StringPart) -> Quoting {
|
||||
fn quoting(&self, string: StringPart) -> Quoting {
|
||||
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
|
||||
// If we're inside an f-string, we need to make sure to preserve the
|
||||
// existing quotes unless we're inside a triple-quoted f-string and
|
||||
|
@ -60,7 +60,7 @@ impl StringNormalizer {
|
|||
// The reason to preserve the quotes is based on the assumption that
|
||||
// the original f-string is valid in terms of quoting, and we don't
|
||||
// want to change that to make it invalid.
|
||||
if (context.quotes().is_triple() && !string.quotes().is_triple())
|
||||
if (context.kind().is_triple_quoted() && !string.kind().is_triple_quoted())
|
||||
|| self.target_version.supports_pep_701()
|
||||
{
|
||||
self.quoting
|
||||
|
@ -73,18 +73,19 @@ impl StringNormalizer {
|
|||
}
|
||||
|
||||
/// Computes the strings preferred quotes.
|
||||
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
|
||||
pub(crate) fn choose_quotes(&self, string: StringPart, locator: &Locator) -> QuoteSelection {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
let first_quote_or_normalized_char_offset = raw_content
|
||||
.bytes()
|
||||
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
|
||||
let string_kind = string.kind();
|
||||
|
||||
let quotes = match self.quoting(string) {
|
||||
Quoting::Preserve => string.quotes(),
|
||||
let new_kind = match self.quoting(string) {
|
||||
Quoting::Preserve => string_kind,
|
||||
Quoting::CanChange => {
|
||||
// Per PEP 8, always prefer double quotes for triple-quoted strings.
|
||||
// Except when using quote-style-preserve.
|
||||
let preferred_style = if string.quotes().triple {
|
||||
let preferred_style = if string_kind.is_triple_quoted() {
|
||||
// ... unless we're formatting a code snippet inside a docstring,
|
||||
// then we specifically want to invert our quote style to avoid
|
||||
// writing out invalid Python.
|
||||
|
@ -145,33 +146,30 @@ impl StringNormalizer {
|
|||
if let Some(first_quote_or_normalized_char_offset) =
|
||||
first_quote_or_normalized_char_offset
|
||||
{
|
||||
if string.prefix().is_raw_string() {
|
||||
if string_kind.is_raw_string() {
|
||||
choose_quotes_for_raw_string(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
string_kind,
|
||||
preferred_quote,
|
||||
)
|
||||
} else {
|
||||
choose_quotes_impl(
|
||||
&raw_content[first_quote_or_normalized_char_offset..],
|
||||
string.quotes(),
|
||||
string_kind,
|
||||
preferred_quote,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
StringQuotes {
|
||||
quote_char: preferred_quote,
|
||||
triple: string.quotes().is_triple(),
|
||||
}
|
||||
string_kind.with_quote_style(preferred_quote)
|
||||
}
|
||||
} else {
|
||||
string.quotes()
|
||||
string_kind
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
QuoteSelection {
|
||||
quotes,
|
||||
kind: new_kind,
|
||||
first_quote_or_normalized_char_offset,
|
||||
}
|
||||
}
|
||||
|
@ -179,11 +177,10 @@ impl StringNormalizer {
|
|||
/// Computes the strings preferred quotes and normalizes its content.
|
||||
pub(crate) fn normalize<'a>(
|
||||
&self,
|
||||
string: &StringPart,
|
||||
string: StringPart,
|
||||
locator: &'a Locator,
|
||||
) -> NormalizedString<'a> {
|
||||
let raw_content = locator.slice(string.content_range());
|
||||
|
||||
let quote_selection = self.choose_quotes(string, locator);
|
||||
|
||||
let normalized = if let Some(first_quote_or_escape_offset) =
|
||||
|
@ -192,8 +189,7 @@ impl StringNormalizer {
|
|||
normalize_string(
|
||||
raw_content,
|
||||
first_quote_or_escape_offset,
|
||||
quote_selection.quotes,
|
||||
string.prefix(),
|
||||
quote_selection.kind,
|
||||
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
|
||||
// `format_fstring` preview style
|
||||
self.format_fstring,
|
||||
|
@ -203,34 +199,31 @@ impl StringNormalizer {
|
|||
};
|
||||
|
||||
NormalizedString {
|
||||
prefix: string.prefix(),
|
||||
kind: quote_selection.kind,
|
||||
content_range: string.content_range(),
|
||||
text: normalized,
|
||||
quotes: quote_selection.quotes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct QuoteSelection {
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
|
||||
first_quote_or_normalized_char_offset: Option<usize>,
|
||||
}
|
||||
|
||||
impl QuoteSelection {
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
pub(crate) fn kind(&self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct NormalizedString<'a> {
|
||||
prefix: crate::string::StringPrefix,
|
||||
|
||||
/// The quotes of the normalized string (preferred quotes)
|
||||
quotes: StringQuotes,
|
||||
/// Holds data about the quotes and prefix of the string
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// The range of the string's content in the source (minus prefix and quotes).
|
||||
content_range: TextRange,
|
||||
|
@ -244,12 +237,8 @@ impl<'a> NormalizedString<'a> {
|
|||
&self.text
|
||||
}
|
||||
|
||||
pub(crate) fn quotes(&self) -> StringQuotes {
|
||||
self.quotes
|
||||
}
|
||||
|
||||
pub(crate) fn prefix(&self) -> StringPrefix {
|
||||
self.prefix
|
||||
pub(crate) fn kind(&self) -> AnyStringKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -261,7 +250,8 @@ impl Ranged for NormalizedString<'_> {
|
|||
|
||||
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
|
||||
ruff_formatter::write!(f, [self.prefix, self.quotes])?;
|
||||
let quotes = StringQuotes::from(self.kind);
|
||||
ruff_formatter::write!(f, [self.kind.prefix(), quotes])?;
|
||||
match &self.text {
|
||||
Cow::Borrowed(_) => {
|
||||
source_text_slice(self.range()).fmt(f)?;
|
||||
|
@ -270,7 +260,7 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
|||
text(normalized).fmt(f)?;
|
||||
}
|
||||
}
|
||||
self.quotes.fmt(f)
|
||||
quotes.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -281,9 +271,9 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
|
|||
/// style is double quotes.
|
||||
fn choose_quotes_for_raw_string(
|
||||
input: &str,
|
||||
quotes: StringQuotes,
|
||||
kind: AnyStringKind,
|
||||
preferred_quote: Quote,
|
||||
) -> StringQuotes {
|
||||
) -> AnyStringKind {
|
||||
let preferred_quote_char = preferred_quote.as_char();
|
||||
let mut chars = input.chars().peekable();
|
||||
let contains_unescaped_configured_quotes = loop {
|
||||
|
@ -294,7 +284,7 @@ fn choose_quotes_for_raw_string(
|
|||
}
|
||||
// `"` or `'`
|
||||
Some(c) if c == preferred_quote_char => {
|
||||
if !quotes.triple {
|
||||
if !kind.is_triple_quoted() {
|
||||
break true;
|
||||
}
|
||||
|
||||
|
@ -319,14 +309,10 @@ fn choose_quotes_for_raw_string(
|
|||
None => break false,
|
||||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: if contains_unescaped_configured_quotes {
|
||||
quotes.quote_char
|
||||
} else {
|
||||
preferred_quote
|
||||
},
|
||||
if contains_unescaped_configured_quotes {
|
||||
kind
|
||||
} else {
|
||||
kind.with_quote_style(preferred_quote)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -338,8 +324,8 @@ fn choose_quotes_for_raw_string(
|
|||
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
|
||||
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
|
||||
/// used unless the string contains `"""`).
|
||||
fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote) -> StringQuotes {
|
||||
let quote = if quotes.triple {
|
||||
fn choose_quotes_impl(input: &str, kind: AnyStringKind, preferred_quote: Quote) -> AnyStringKind {
|
||||
let quote = if kind.is_triple_quoted() {
|
||||
// True if the string contains a triple quote sequence of the configured quote style.
|
||||
let mut uses_triple_quotes = false;
|
||||
let mut chars = input.chars().peekable();
|
||||
|
@ -393,7 +379,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
|||
if uses_triple_quotes {
|
||||
// String contains a triple quote sequence of the configured quote style.
|
||||
// Keep the existing quote style.
|
||||
quotes.quote_char
|
||||
kind.quote_style()
|
||||
} else {
|
||||
preferred_quote
|
||||
}
|
||||
|
@ -433,10 +419,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
|||
}
|
||||
};
|
||||
|
||||
StringQuotes {
|
||||
triple: quotes.triple,
|
||||
quote_char: quote,
|
||||
}
|
||||
kind.with_quote_style(quote)
|
||||
}
|
||||
|
||||
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
|
||||
|
@ -446,8 +429,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
|
|||
pub(crate) fn normalize_string(
|
||||
input: &str,
|
||||
start_offset: usize,
|
||||
quotes: StringQuotes,
|
||||
prefix: StringPrefix,
|
||||
kind: AnyStringKind,
|
||||
format_fstring: bool,
|
||||
) -> Cow<str> {
|
||||
// The normalized string if `input` is not yet normalized.
|
||||
|
@ -457,14 +439,14 @@ pub(crate) fn normalize_string(
|
|||
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
|
||||
let mut last_index = 0;
|
||||
|
||||
let quote = quotes.quote_char;
|
||||
let quote = kind.quote_style();
|
||||
let preferred_quote = quote.as_char();
|
||||
let opposite_quote = quote.opposite().as_char();
|
||||
|
||||
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
|
||||
|
||||
let is_raw = prefix.is_raw_string();
|
||||
let is_fstring = !format_fstring && prefix.is_fstring();
|
||||
let is_raw = kind.is_raw_string();
|
||||
let is_fstring = !format_fstring && kind.is_f_string();
|
||||
let mut formatted_value_nesting = 0u32;
|
||||
|
||||
while let Some((index, c)) = chars.next() {
|
||||
|
@ -502,7 +484,7 @@ pub(crate) fn normalize_string(
|
|||
} else {
|
||||
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
|
||||
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
|
||||
if let Some(normalised) = UnicodeEscape::new(next, !kind.is_byte_string())
|
||||
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
|
||||
{
|
||||
let escape_start_offset = index + escape_start_len;
|
||||
|
@ -521,7 +503,7 @@ pub(crate) fn normalize_string(
|
|||
}
|
||||
}
|
||||
|
||||
if !quotes.triple {
|
||||
if !kind.is_triple_quoted() {
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if next == opposite_quote && formatted_value_nesting == 0 {
|
||||
// Remove the escape by ending before the backslash and starting again with the quote
|
||||
|
@ -534,7 +516,10 @@ pub(crate) fn normalize_string(
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
|
||||
} else if !kind.is_triple_quoted()
|
||||
&& c == preferred_quote
|
||||
&& formatted_value_nesting == 0
|
||||
{
|
||||
// Escape the quote
|
||||
output.push_str(&input[last_index..index]);
|
||||
output.push('\\');
|
||||
|
@ -704,9 +689,7 @@ impl UnicodeEscape {
|
|||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
|
||||
use ruff_python_ast::str::Quote;
|
||||
|
||||
use crate::string::{StringPrefix, StringQuotes};
|
||||
use ruff_python_ast::{str::Quote, AnyStringKind, AnyStringPrefix, ByteStringPrefix};
|
||||
|
||||
use super::{normalize_string, UnicodeEscape};
|
||||
|
||||
|
@ -727,11 +710,11 @@ mod tests {
|
|||
let normalized = normalize_string(
|
||||
input,
|
||||
0,
|
||||
StringQuotes {
|
||||
triple: false,
|
||||
quote_char: Quote::Double,
|
||||
},
|
||||
StringPrefix::BYTE,
|
||||
AnyStringKind::new(
|
||||
AnyStringPrefix::Bytes(ByteStringPrefix::Regular),
|
||||
Quote::Double,
|
||||
false,
|
||||
),
|
||||
true,
|
||||
);
|
||||
|
||||
|
|
|
@ -34,19 +34,15 @@ use std::{char, cmp::Ordering, str::FromStr};
|
|||
use unicode_ident::{is_xid_continue, is_xid_start};
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
use ruff_python_ast::{FStringPrefix, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{
|
||||
str::Quote, AnyStringKind, AnyStringPrefix, FStringPrefix, Int, IpyEscapeKind,
|
||||
};
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::cursor::{Cursor, EOF_CHAR};
|
||||
use crate::lexer::fstring::{FStringContext, FStrings};
|
||||
use crate::lexer::indentation::{Indentation, Indentations};
|
||||
use crate::{
|
||||
soft_keywords::SoftKeywordTransformer,
|
||||
string::FStringErrorType,
|
||||
string_token_flags::{StringKind, StringPrefix},
|
||||
token::Tok,
|
||||
Mode,
|
||||
};
|
||||
use crate::{soft_keywords::SoftKeywordTransformer, string::FStringErrorType, token::Tok, Mode};
|
||||
|
||||
mod cursor;
|
||||
mod fstring;
|
||||
|
@ -188,14 +184,14 @@ impl<'source> Lexer<'source> {
|
|||
return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true }));
|
||||
}
|
||||
(_, quote @ ('\'' | '"')) => {
|
||||
if let Ok(prefix) = StringPrefix::try_from(first) {
|
||||
if let Ok(prefix) = AnyStringPrefix::try_from(first) {
|
||||
self.cursor.bump();
|
||||
return self.lex_string(prefix, quote);
|
||||
}
|
||||
}
|
||||
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
|
||||
self.cursor.bump();
|
||||
if let Ok(prefix) = StringPrefix::try_from([first, second]) {
|
||||
if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) {
|
||||
let quote = self.cursor.bump().unwrap();
|
||||
return self.lex_string(prefix, quote);
|
||||
}
|
||||
|
@ -560,11 +556,14 @@ impl<'source> Lexer<'source> {
|
|||
#[cfg(debug_assertions)]
|
||||
debug_assert_eq!(self.cursor.previous(), quote);
|
||||
|
||||
let mut kind = StringKind::from_prefix(StringPrefix::Format(prefix));
|
||||
let mut kind = AnyStringKind::default()
|
||||
.with_prefix(AnyStringPrefix::Format(prefix))
|
||||
.with_quote_style(if quote == '"' {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
});
|
||||
|
||||
if quote == '"' {
|
||||
kind = kind.with_double_quotes();
|
||||
}
|
||||
if self.cursor.eat_char2(quote, quote) {
|
||||
kind = kind.with_triple_quotes();
|
||||
}
|
||||
|
@ -708,15 +707,17 @@ impl<'source> Lexer<'source> {
|
|||
}
|
||||
|
||||
/// Lex a string literal.
|
||||
fn lex_string(&mut self, prefix: StringPrefix, quote: char) -> Result<Tok, LexicalError> {
|
||||
fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> Result<Tok, LexicalError> {
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert_eq!(self.cursor.previous(), quote);
|
||||
|
||||
let mut kind = StringKind::from_prefix(prefix);
|
||||
|
||||
if quote == '"' {
|
||||
kind = kind.with_double_quotes();
|
||||
}
|
||||
let mut kind = AnyStringKind::default()
|
||||
.with_prefix(prefix)
|
||||
.with_quote_style(if quote == '"' {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
});
|
||||
|
||||
// If the next two characters are also the quote character, then we have a triple-quoted
|
||||
// string; consume those two characters and ensure that we require a triple-quote to close
|
||||
|
@ -1082,7 +1083,7 @@ impl<'source> Lexer<'source> {
|
|||
c if is_ascii_identifier_start(c) => self.lex_identifier(c)?,
|
||||
'0'..='9' => self.lex_number(c)?,
|
||||
'#' => return Ok((self.lex_comment(), self.token_range())),
|
||||
'\'' | '"' => self.lex_string(StringPrefix::default(), c)?,
|
||||
'\'' | '"' => self.lex_string(AnyStringPrefix::default(), c)?,
|
||||
'=' => {
|
||||
if self.cursor.eat_char('=') {
|
||||
Tok::EqEqual
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use crate::string_token_flags::StringKind;
|
||||
use ruff_python_ast::AnyStringKind;
|
||||
|
||||
/// The context representing the current f-string that the lexer is in.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct FStringContext {
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
|
||||
/// The level of nesting for the lexer when it entered the current f-string.
|
||||
/// The nesting level includes all kinds of parentheses i.e., round, square,
|
||||
|
@ -17,7 +17,7 @@ pub(crate) struct FStringContext {
|
|||
}
|
||||
|
||||
impl FStringContext {
|
||||
pub(crate) const fn new(kind: StringKind, nesting: u32) -> Self {
|
||||
pub(crate) const fn new(kind: AnyStringKind, nesting: u32) -> Self {
|
||||
debug_assert!(kind.is_f_string());
|
||||
Self {
|
||||
kind,
|
||||
|
@ -26,7 +26,7 @@ impl FStringContext {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn kind(&self) -> StringKind {
|
||||
pub(crate) const fn kind(&self) -> AnyStringKind {
|
||||
debug_assert!(self.kind.is_f_string());
|
||||
self.kind
|
||||
}
|
||||
|
|
|
@ -115,7 +115,6 @@ pub use parser::{
|
|||
};
|
||||
use ruff_python_ast::{Mod, PySourceType, Suite};
|
||||
pub use string::FStringErrorType;
|
||||
pub use string_token_flags::StringKind;
|
||||
pub use token::{Tok, TokenKind};
|
||||
|
||||
use crate::lexer::LexResult;
|
||||
|
@ -128,7 +127,6 @@ pub mod lexer;
|
|||
mod parser;
|
||||
mod soft_keywords;
|
||||
mod string;
|
||||
mod string_token_flags;
|
||||
mod token;
|
||||
mod token_source;
|
||||
pub mod typing;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
// See also: https://greentreesnakes.readthedocs.io/en/latest/nodes.html#keyword
|
||||
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
|
||||
use crate::{
|
||||
FStringErrorType,
|
||||
Mode,
|
||||
|
@ -12,7 +12,6 @@ use crate::{
|
|||
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
|
||||
context::set_context,
|
||||
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
|
||||
string_token_flags::StringKind,
|
||||
token,
|
||||
invalid,
|
||||
};
|
||||
|
@ -1983,7 +1982,7 @@ extern {
|
|||
Dedent => token::Tok::Dedent,
|
||||
StartModule => token::Tok::StartModule,
|
||||
StartExpression => token::Tok::StartExpression,
|
||||
fstring_start => token::Tok::FStringStart(<StringKind>),
|
||||
fstring_start => token::Tok::FStringStart(<AnyStringKind>),
|
||||
FStringEnd => token::Tok::FStringEnd,
|
||||
"!" => token::Tok::Exclamation,
|
||||
"?" => token::Tok::Question,
|
||||
|
@ -2076,11 +2075,11 @@ extern {
|
|||
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
|
||||
string => token::Tok::String {
|
||||
value: <Box<str>>,
|
||||
kind: <StringKind>,
|
||||
kind: <AnyStringKind>,
|
||||
},
|
||||
fstring_middle => token::Tok::FStringMiddle {
|
||||
value: <Box<str>>,
|
||||
kind: <StringKind>,
|
||||
kind: <AnyStringKind>,
|
||||
},
|
||||
name => token::Tok::Name { name: <Box<str>> },
|
||||
ipy_escape_command => token::Tok::IpyEscapeCommand {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// auto-generated: "lalrpop 0.20.0"
|
||||
// sha3: c98876ae871e13c1a0cabf962138ded61584185a0c3144b626dac60f707ea396
|
||||
// sha3: 4ca26eae1233cf922ef88887715de0a4ca45076324249a20b87f095e9638165d
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
|
||||
use crate::{
|
||||
FStringErrorType,
|
||||
Mode,
|
||||
|
@ -9,7 +9,6 @@ use crate::{
|
|||
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
|
||||
context::set_context,
|
||||
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
|
||||
string_token_flags::StringKind,
|
||||
token,
|
||||
invalid,
|
||||
};
|
||||
|
@ -26,7 +25,7 @@ extern crate alloc;
|
|||
mod __parse__Top {
|
||||
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
|
||||
use crate::{
|
||||
FStringErrorType,
|
||||
Mode,
|
||||
|
@ -34,7 +33,6 @@ mod __parse__Top {
|
|||
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
|
||||
context::set_context,
|
||||
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
|
||||
string_token_flags::StringKind,
|
||||
token,
|
||||
invalid,
|
||||
};
|
||||
|
@ -52,8 +50,8 @@ mod __parse__Top {
|
|||
Variant0(token::Tok),
|
||||
Variant1((f64, f64)),
|
||||
Variant2(f64),
|
||||
Variant3((Box<str>, StringKind)),
|
||||
Variant4(StringKind),
|
||||
Variant3((Box<str>, AnyStringKind)),
|
||||
Variant4(AnyStringKind),
|
||||
Variant5(Int),
|
||||
Variant6((IpyEscapeKind, Box<str>)),
|
||||
Variant7(Box<str>),
|
||||
|
@ -151,7 +149,7 @@ mod __parse__Top {
|
|||
Variant99(ast::TypeParams),
|
||||
Variant100(core::option::Option<ast::TypeParams>),
|
||||
Variant101(ast::UnaryOp),
|
||||
Variant102(core::option::Option<(Box<str>, StringKind)>),
|
||||
Variant102(core::option::Option<(Box<str>, AnyStringKind)>),
|
||||
}
|
||||
const __ACTION: &[i16] = &[
|
||||
// State 0
|
||||
|
@ -18322,7 +18320,7 @@ mod __parse__Top {
|
|||
fn __pop_Variant3<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, (Box<str>, StringKind), TextSize)
|
||||
) -> (TextSize, (Box<str>, AnyStringKind), TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant3(__v), __r)) => (__l, __v, __r),
|
||||
|
@ -18479,6 +18477,16 @@ mod __parse__Top {
|
|||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant4<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, AnyStringKind, TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
|
||||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant7<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
|
@ -18509,16 +18517,6 @@ mod __parse__Top {
|
|||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant4<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, StringKind, TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
|
||||
_ => __symbol_type_mismatch()
|
||||
}
|
||||
}
|
||||
fn __pop_Variant67<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
|
@ -19102,7 +19100,7 @@ mod __parse__Top {
|
|||
fn __pop_Variant102<
|
||||
>(
|
||||
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
|
||||
) -> (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize)
|
||||
) -> (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize)
|
||||
{
|
||||
match __symbols.pop() {
|
||||
Some((__l, __Symbol::Variant102(__v), __r)) => (__l, __v, __r),
|
||||
|
@ -35724,7 +35722,7 @@ fn __action185<
|
|||
(_, parameters, _): (TextSize, core::option::Option<ast::Parameters>, TextSize),
|
||||
(_, end_location_args, _): (TextSize, TextSize, TextSize),
|
||||
(_, _, _): (TextSize, token::Tok, TextSize),
|
||||
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
(_, body, _): (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
|
@ -36179,7 +36177,7 @@ fn __action218<
|
|||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, location, _): (TextSize, TextSize, TextSize),
|
||||
(_, string, _): (TextSize, (Box<str>, StringKind), TextSize),
|
||||
(_, string, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -36196,7 +36194,7 @@ fn __action219<
|
|||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, location, _): (TextSize, TextSize, TextSize),
|
||||
(_, start, _): (TextSize, StringKind, TextSize),
|
||||
(_, start, _): (TextSize, AnyStringKind, TextSize),
|
||||
(_, elements, _): (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
(_, _, _): (TextSize, token::Tok, TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
|
@ -36230,7 +36228,7 @@ fn __action221<
|
|||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, location, _): (TextSize, TextSize, TextSize),
|
||||
(_, fstring_middle, _): (TextSize, (Box<str>, StringKind), TextSize),
|
||||
(_, fstring_middle, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
(_, end_location, _): (TextSize, TextSize, TextSize),
|
||||
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -37185,8 +37183,8 @@ fn __action282<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
(_, __0, _): (TextSize, (Box<str>, StringKind), TextSize),
|
||||
) -> core::option::Option<(Box<str>, StringKind)>
|
||||
(_, __0, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
) -> core::option::Option<(Box<str>, AnyStringKind)>
|
||||
{
|
||||
Some(__0)
|
||||
}
|
||||
|
@ -37199,7 +37197,7 @@ fn __action283<
|
|||
mode: Mode,
|
||||
__lookbehind: &TextSize,
|
||||
__lookahead: &TextSize,
|
||||
) -> core::option::Option<(Box<str>, StringKind)>
|
||||
) -> core::option::Option<(Box<str>, AnyStringKind)>
|
||||
{
|
||||
None
|
||||
}
|
||||
|
@ -47957,7 +47955,7 @@ fn __action791<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, TextSize, TextSize),
|
||||
|
@ -48017,7 +48015,7 @@ fn __action793<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__1: (TextSize, TextSize, TextSize),
|
||||
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -49121,7 +49119,7 @@ fn __action828<
|
|||
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
|
||||
__2: (TextSize, TextSize, TextSize),
|
||||
__3: (TextSize, token::Tok, TextSize),
|
||||
__4: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__4: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__5: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
__6: (TextSize, TextSize, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
|
@ -52139,7 +52137,7 @@ fn __action924<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__1: (TextSize, TextSize, TextSize),
|
||||
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -63911,7 +63909,7 @@ fn __action1304<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
) -> StringType
|
||||
|
@ -63967,7 +63965,7 @@ fn __action1306<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
let __start0 = __0.2;
|
||||
|
@ -64870,7 +64868,7 @@ fn __action1338<
|
|||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -69379,7 +69377,7 @@ fn __action1485<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
let __start0 = __0.2;
|
||||
|
@ -72279,7 +72277,7 @@ fn __action1578<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, token::Tok, TextSize),
|
||||
) -> StringType
|
||||
{
|
||||
|
@ -72307,7 +72305,7 @@ fn __action1579<
|
|||
>(
|
||||
source_code: &str,
|
||||
mode: Mode,
|
||||
__0: (TextSize, StringKind, TextSize),
|
||||
__0: (TextSize, AnyStringKind, TextSize),
|
||||
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
) -> StringType
|
||||
|
@ -76896,7 +76894,7 @@ fn __action1716<
|
|||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, ast::Parameters, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -76927,7 +76925,7 @@ fn __action1717<
|
|||
mode: Mode,
|
||||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, token::Tok, TextSize),
|
||||
__2: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
|
||||
__2: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
|
||||
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -78832,7 +78830,7 @@ fn __action1774<
|
|||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, ast::Parameters, TextSize),
|
||||
__2: (TextSize, token::Tok, TextSize),
|
||||
__3: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__3: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
@ -78895,7 +78893,7 @@ fn __action1776<
|
|||
mode: Mode,
|
||||
__0: (TextSize, token::Tok, TextSize),
|
||||
__1: (TextSize, token::Tok, TextSize),
|
||||
__2: (TextSize, (Box<str>, StringKind), TextSize),
|
||||
__2: (TextSize, (Box<str>, AnyStringKind), TextSize),
|
||||
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
|
||||
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
|
||||
{
|
||||
|
|
|
@ -2,11 +2,10 @@
|
|||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use ruff_python_ast::{self as ast, Expr};
|
||||
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
||||
use crate::string_token_flags::StringKind;
|
||||
use crate::token::Tok;
|
||||
|
||||
pub(crate) enum StringType {
|
||||
|
@ -43,13 +42,13 @@ enum EscapedChar {
|
|||
struct StringParser {
|
||||
source: Box<str>,
|
||||
cursor: usize,
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
offset: TextSize,
|
||||
range: TextRange,
|
||||
}
|
||||
|
||||
impl StringParser {
|
||||
fn new(source: Box<str>, kind: StringKind, offset: TextSize, range: TextRange) -> Self {
|
||||
fn new(source: Box<str>, kind: AnyStringKind, offset: TextSize, range: TextRange) -> Self {
|
||||
Self {
|
||||
source,
|
||||
cursor: 0,
|
||||
|
@ -425,7 +424,7 @@ impl StringParser {
|
|||
|
||||
pub(crate) fn parse_string_literal(
|
||||
source: Box<str>,
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
range: TextRange,
|
||||
) -> Result<StringType, LexicalError> {
|
||||
StringParser::new(source, kind, range.start() + kind.opener_len(), range).parse()
|
||||
|
@ -433,7 +432,7 @@ pub(crate) fn parse_string_literal(
|
|||
|
||||
pub(crate) fn parse_fstring_literal_element(
|
||||
source: Box<str>,
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
range: TextRange,
|
||||
) -> Result<ast::FStringElement, LexicalError> {
|
||||
StringParser::new(source, kind, range.start(), range).parse_fstring_middle()
|
||||
|
|
|
@ -1,395 +0,0 @@
|
|||
use std::fmt;
|
||||
|
||||
use bitflags::bitflags;
|
||||
|
||||
use ruff_python_ast::{str::Quote, ByteStringPrefix, FStringPrefix, StringLiteralPrefix};
|
||||
use ruff_text_size::{TextLen, TextSize};
|
||||
|
||||
bitflags! {
|
||||
/// Flags that can be queried to obtain information
|
||||
/// regarding the prefixes and quotes used for a string literal.
|
||||
///
|
||||
/// Note that not all of these flags can be validly combined -- e.g.,
|
||||
/// it is invalid to combine the `U_PREFIX` flag with any other
|
||||
/// of the `*_PREFIX` flags. As such, the recommended way to set the
|
||||
/// prefix flags is by calling the `as_flags()` method on the
|
||||
/// `StringPrefix` enum.
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
struct StringFlags: u8 {
|
||||
/// The string uses double quotes (`"`).
|
||||
/// If this flag is not set, the string uses single quotes (`'`).
|
||||
const DOUBLE = 1 << 0;
|
||||
|
||||
/// The string is triple-quoted:
|
||||
/// it begins and ends with three consecutive quote characters.
|
||||
const TRIPLE_QUOTED = 1 << 1;
|
||||
|
||||
/// The string has a `u` or `U` prefix.
|
||||
/// While this prefix is a no-op at runtime,
|
||||
/// strings with this prefix can have no other prefixes set.
|
||||
const U_PREFIX = 1 << 2;
|
||||
|
||||
/// The string has a `b` or `B` prefix.
|
||||
/// This means that the string is a sequence of `int`s at runtime,
|
||||
/// rather than a sequence of `str`s.
|
||||
/// Strings with this flag can also be raw strings,
|
||||
/// but can have no other prefixes.
|
||||
const B_PREFIX = 1 << 3;
|
||||
|
||||
/// The string has a `f` or `F` prefix, meaning it is an f-string.
|
||||
/// F-strings can also be raw strings,
|
||||
/// but can have no other prefixes.
|
||||
const F_PREFIX = 1 << 4;
|
||||
|
||||
/// The string has an `r` prefix, meaning it is a raw string.
|
||||
/// F-strings and byte-strings can be raw,
|
||||
/// as can strings with no other prefixes.
|
||||
/// U-strings cannot be raw.
|
||||
const R_PREFIX_LOWER = 1 << 5;
|
||||
|
||||
/// The string has an `R` prefix, meaning it is a raw string.
|
||||
/// The casing of the `r`/`R` has no semantic significance at runtime;
|
||||
/// see https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
|
||||
/// for why we track the casing of the `r` prefix,
|
||||
/// but not for any other prefix
|
||||
const R_PREFIX_UPPER = 1 << 6;
|
||||
}
|
||||
}
|
||||
|
||||
/// Enumeration of all the possible valid prefixes
|
||||
/// prior to a Python string literal.
|
||||
///
|
||||
/// Using the `as_flags()` method on variants of this enum
|
||||
/// is the recommended way to set `*_PREFIX` flags from the
|
||||
/// `StringFlags` bitflag, as it means that you cannot accidentally
|
||||
/// set a combination of `*_PREFIX` flags that would be invalid
|
||||
/// at runtime in Python.
|
||||
///
|
||||
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
|
||||
/// [PEP 701]: https://peps.python.org/pep-0701/
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum StringPrefix {
|
||||
/// Prefixes that indicate the string is a bytestring
|
||||
Bytes(ByteStringPrefix),
|
||||
|
||||
/// Prefixes that indicate the string is an f-string
|
||||
Format(FStringPrefix),
|
||||
|
||||
/// All other prefixes
|
||||
Regular(StringLiteralPrefix),
|
||||
}
|
||||
|
||||
impl TryFrom<char> for StringPrefix {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(value: char) -> Result<Self, String> {
|
||||
let result = match value {
|
||||
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
|
||||
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
|
||||
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
|
||||
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
|
||||
'f' | 'F' => Self::Format(FStringPrefix::Regular),
|
||||
_ => return Err(format!("Unexpected prefix '{value}'")),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<[char; 2]> for StringPrefix {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(value: [char; 2]) -> Result<Self, String> {
|
||||
let result = match value {
|
||||
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: false })
|
||||
}
|
||||
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: true })
|
||||
}
|
||||
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
|
||||
}
|
||||
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
|
||||
}
|
||||
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl StringPrefix {
|
||||
const fn as_flags(self) -> StringFlags {
|
||||
match self {
|
||||
// regular strings
|
||||
Self::Regular(StringLiteralPrefix::Empty) => StringFlags::empty(),
|
||||
Self::Regular(StringLiteralPrefix::Unicode) => StringFlags::U_PREFIX,
|
||||
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
|
||||
StringFlags::R_PREFIX_LOWER
|
||||
}
|
||||
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
|
||||
StringFlags::R_PREFIX_UPPER
|
||||
}
|
||||
|
||||
// bytestrings
|
||||
Self::Bytes(ByteStringPrefix::Regular) => StringFlags::B_PREFIX,
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
|
||||
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_LOWER)
|
||||
}
|
||||
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
|
||||
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_UPPER)
|
||||
}
|
||||
|
||||
// f-strings
|
||||
Self::Format(FStringPrefix::Regular) => StringFlags::F_PREFIX,
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
|
||||
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_LOWER)
|
||||
}
|
||||
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
|
||||
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_UPPER)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fn from_kind(kind: StringKind) -> Self {
|
||||
let StringKind(flags) = kind;
|
||||
|
||||
// f-strings
|
||||
if flags.contains(StringFlags::F_PREFIX) {
|
||||
if flags.contains(StringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
|
||||
}
|
||||
if flags.contains(StringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
|
||||
}
|
||||
return Self::Format(FStringPrefix::Regular);
|
||||
}
|
||||
|
||||
// bytestrings
|
||||
if flags.contains(StringFlags::B_PREFIX) {
|
||||
if flags.contains(StringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
|
||||
}
|
||||
if flags.contains(StringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
|
||||
}
|
||||
return Self::Bytes(ByteStringPrefix::Regular);
|
||||
}
|
||||
|
||||
// all other strings
|
||||
if flags.contains(StringFlags::R_PREFIX_LOWER) {
|
||||
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
|
||||
}
|
||||
if flags.contains(StringFlags::R_PREFIX_UPPER) {
|
||||
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
|
||||
}
|
||||
if flags.contains(StringFlags::U_PREFIX) {
|
||||
return Self::Regular(StringLiteralPrefix::Unicode);
|
||||
}
|
||||
Self::Regular(StringLiteralPrefix::Empty)
|
||||
}
|
||||
|
||||
const fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Regular(regular_prefix) => regular_prefix.as_str(),
|
||||
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
|
||||
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for StringPrefix {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for StringPrefix {
|
||||
fn default() -> Self {
|
||||
Self::Regular(StringLiteralPrefix::Empty)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct StringKind(StringFlags);
|
||||
|
||||
impl StringKind {
|
||||
pub(crate) const fn from_prefix(prefix: StringPrefix) -> Self {
|
||||
Self(prefix.as_flags())
|
||||
}
|
||||
|
||||
pub const fn prefix(self) -> StringPrefix {
|
||||
StringPrefix::from_kind(self)
|
||||
}
|
||||
|
||||
/// Does the string have a `u` or `U` prefix?
|
||||
pub const fn is_u_string(self) -> bool {
|
||||
self.0.contains(StringFlags::U_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string have an `r` or `R` prefix?
|
||||
pub const fn is_raw_string(self) -> bool {
|
||||
self.0
|
||||
.intersects(StringFlags::R_PREFIX_LOWER.union(StringFlags::R_PREFIX_UPPER))
|
||||
}
|
||||
|
||||
/// Does the string have an `f` or `F` prefix?
|
||||
pub const fn is_f_string(self) -> bool {
|
||||
self.0.contains(StringFlags::F_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string have a `b` or `B` prefix?
|
||||
pub const fn is_byte_string(self) -> bool {
|
||||
self.0.contains(StringFlags::B_PREFIX)
|
||||
}
|
||||
|
||||
/// Does the string use single or double quotes in its opener and closer?
|
||||
pub const fn quote_style(self) -> Quote {
|
||||
if self.0.contains(StringFlags::DOUBLE) {
|
||||
Quote::Double
|
||||
} else {
|
||||
Quote::Single
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the string triple-quoted, i.e.,
|
||||
/// does it begin and end with three consecutive quote characters?
|
||||
pub const fn is_triple_quoted(self) -> bool {
|
||||
self.0.contains(StringFlags::TRIPLE_QUOTED)
|
||||
}
|
||||
|
||||
/// A `str` representation of the quotes used to start and close.
|
||||
/// This does not include any prefixes the string has in its opener.
|
||||
pub const fn quote_str(self) -> &'static str {
|
||||
if self.is_triple_quoted() {
|
||||
match self.quote_style() {
|
||||
Quote::Single => "'''",
|
||||
Quote::Double => r#"""""#,
|
||||
}
|
||||
} else {
|
||||
match self.quote_style() {
|
||||
Quote::Single => "'",
|
||||
Quote::Double => "\"",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The length of the prefixes used (if any) in the string's opener.
|
||||
pub fn prefix_len(self) -> TextSize {
|
||||
self.prefix().as_str().text_len()
|
||||
}
|
||||
|
||||
/// The length of the quotes used to start and close the string.
|
||||
/// This does not include the length of any prefixes the string has
|
||||
/// in its opener.
|
||||
pub const fn quote_len(self) -> TextSize {
|
||||
if self.is_triple_quoted() {
|
||||
TextSize::new(3)
|
||||
} else {
|
||||
TextSize::new(1)
|
||||
}
|
||||
}
|
||||
|
||||
/// The total length of the string's opener,
|
||||
/// i.e., the length of the prefixes plus the length
|
||||
/// of the quotes used to open the string.
|
||||
pub fn opener_len(self) -> TextSize {
|
||||
self.prefix_len() + self.quote_len()
|
||||
}
|
||||
|
||||
/// The total length of the string's closer.
|
||||
/// This is always equal to `self.quote_len()`,
|
||||
/// but is provided here for symmetry with the `opener_len()` method.
|
||||
pub const fn closer_len(self) -> TextSize {
|
||||
self.quote_len()
|
||||
}
|
||||
|
||||
pub fn format_string_contents(self, contents: &str) -> String {
|
||||
format!(
|
||||
"{}{}{}{}",
|
||||
self.prefix(),
|
||||
self.quote_str(),
|
||||
contents,
|
||||
self.quote_str()
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_double_quotes(mut self) -> Self {
|
||||
self.0 |= StringFlags::DOUBLE;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_triple_quotes(mut self) -> Self {
|
||||
self.0 |= StringFlags::TRIPLE_QUOTED;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for StringKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("StringKind")
|
||||
.field("prefix", &self.prefix())
|
||||
.field("triple_quoted", &self.is_triple_quoted())
|
||||
.field("quote_style", &self.quote_style())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringKind> for ruff_python_ast::StringLiteralFlags {
|
||||
fn from(value: StringKind) -> ruff_python_ast::StringLiteralFlags {
|
||||
let mut new = ruff_python_ast::StringLiteralFlags::default();
|
||||
if value.quote_style().is_double() {
|
||||
new = new.with_double_quotes();
|
||||
}
|
||||
if value.is_triple_quoted() {
|
||||
new = new.with_triple_quotes();
|
||||
}
|
||||
let StringPrefix::Regular(prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into a regular string",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
new.with_prefix(prefix)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringKind> for ruff_python_ast::BytesLiteralFlags {
|
||||
fn from(value: StringKind) -> ruff_python_ast::BytesLiteralFlags {
|
||||
let mut new = ruff_python_ast::BytesLiteralFlags::default();
|
||||
if value.quote_style().is_double() {
|
||||
new = new.with_double_quotes();
|
||||
}
|
||||
if value.is_triple_quoted() {
|
||||
new = new.with_triple_quotes();
|
||||
}
|
||||
let StringPrefix::Bytes(bytestring_prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into a bytestring",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
new.with_prefix(bytestring_prefix)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringKind> for ruff_python_ast::FStringFlags {
|
||||
fn from(value: StringKind) -> ruff_python_ast::FStringFlags {
|
||||
let mut new = ruff_python_ast::FStringFlags::default();
|
||||
if value.quote_style().is_double() {
|
||||
new = new.with_double_quotes();
|
||||
}
|
||||
if value.is_triple_quoted() {
|
||||
new = new.with_triple_quotes();
|
||||
}
|
||||
let StringPrefix::Format(fstring_prefix) = value.prefix() else {
|
||||
unreachable!(
|
||||
"Should never attempt to convert {} into an f-string",
|
||||
value.prefix()
|
||||
)
|
||||
};
|
||||
new.with_prefix(fstring_prefix)
|
||||
}
|
||||
}
|
|
@ -4,10 +4,9 @@
|
|||
//! loosely based on the token definitions found in the [CPython source].
|
||||
//!
|
||||
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h;
|
||||
use crate::string_token_flags::StringKind;
|
||||
use crate::Mode;
|
||||
|
||||
use ruff_python_ast::{Int, IpyEscapeKind};
|
||||
use ruff_python_ast::{AnyStringKind, Int, IpyEscapeKind};
|
||||
use std::fmt;
|
||||
|
||||
/// The set of tokens the Python source code can be tokenized in.
|
||||
|
@ -44,11 +43,11 @@ pub enum Tok {
|
|||
value: Box<str>,
|
||||
/// Flags that can be queried to determine the quote style
|
||||
/// and prefixes of the string
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
},
|
||||
/// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
|
||||
/// and the opening quote(s).
|
||||
FStringStart(StringKind),
|
||||
FStringStart(AnyStringKind),
|
||||
/// Token value that includes the portion of text inside the f-string that's not
|
||||
/// part of the expression part and isn't an opening or closing brace.
|
||||
FStringMiddle {
|
||||
|
@ -56,7 +55,7 @@ pub enum Tok {
|
|||
value: Box<str>,
|
||||
/// Flags that can be queried to determine the quote style
|
||||
/// and prefixes of the string
|
||||
kind: StringKind,
|
||||
kind: AnyStringKind,
|
||||
},
|
||||
/// Token value for the end of an f-string. This includes the closing quote.
|
||||
FStringEnd,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue