Simplify formatting of strings by using flags from the AST nodes (#10489)

This commit is contained in:
Alex Waygood 2024-03-20 16:16:54 +00:00 committed by GitHub
parent fc792d1d2e
commit 7caf0d064a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 725 additions and 809 deletions

1
Cargo.lock generated
View file

@ -2251,7 +2251,6 @@ name = "ruff_python_formatter"
version = "0.0.0"
dependencies = [
"anyhow",
"bitflags 2.4.2",
"clap",
"countme",
"insta",

View file

@ -2,14 +2,14 @@ use std::str::FromStr;
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::Expr;
use ruff_python_ast::{AnyStringKind, Expr};
use ruff_python_literal::{
cformat::{CFormatErrorType, CFormatString},
format::FormatPart,
format::FromTemplate,
format::{FormatSpec, FormatSpecError, FormatString},
};
use ruff_python_parser::{lexer, Mode, StringKind, Tok};
use ruff_python_parser::{lexer, Mode, Tok};
use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::Checker;
@ -92,7 +92,7 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
/// Ex) `"%z" % "1"`
pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
// Grab each string segment (in case there's an implicit concatenation).
let mut strings: Vec<(TextRange, StringKind)> = vec![];
let mut strings: Vec<(TextRange, AnyStringKind)> = vec![];
for (tok, range) in
lexer::lex_starts_at(checker.locator().slice(expr), Mode::Module, expr.start()).flatten()
{

View file

@ -1,8 +1,8 @@
use std::str::FromStr;
use ruff_python_ast::{self as ast, Expr};
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
use ruff_python_literal::cformat::{CFormatPart, CFormatSpec, CFormatStrOrBytes, CFormatString};
use ruff_python_parser::{lexer, AsMode, StringKind, Tok};
use ruff_python_parser::{lexer, AsMode, Tok};
use ruff_text_size::{Ranged, TextRange};
use rustc_hash::FxHashMap;
@ -218,7 +218,7 @@ fn is_valid_dict(
pub(crate) fn bad_string_format_type(checker: &mut Checker, expr: &Expr, right: &Expr) {
// Grab each string segment (in case there's an implicit concatenation).
let content = checker.locator().slice(expr);
let mut strings: Vec<(TextRange, StringKind)> = vec![];
let mut strings: Vec<(TextRange, AnyStringKind)> = vec![];
for (tok, range) in
lexer::lex_starts_at(content, checker.source_type.as_mode(), expr.start()).flatten()
{

View file

@ -4,12 +4,12 @@ use std::str::FromStr;
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::whitespace::indentation;
use ruff_python_ast::{self as ast, Expr};
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
use ruff_python_codegen::Stylist;
use ruff_python_literal::cformat::{
CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString,
};
use ruff_python_parser::{lexer, AsMode, StringKind, Tok};
use ruff_python_parser::{lexer, AsMode, Tok};
use ruff_python_stdlib::identifiers::is_identifier;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
@ -352,7 +352,7 @@ fn convertible(format_string: &CFormatString, params: &Expr) -> bool {
/// UP031
pub(crate) fn printf_string_formatting(checker: &mut Checker, expr: &Expr, right: &Expr) {
// Grab each string segment (in case there's an implicit concatenation).
let mut strings: Vec<(TextRange, StringKind)> = vec![];
let mut strings: Vec<(TextRange, AnyStringKind)> = vec![];
let mut extension = None;
for (tok, range) in lexer::lex_starts_at(
checker.locator().slice(expr),

View file

@ -9,7 +9,7 @@ use std::slice::{Iter, IterMut};
use bitflags::bitflags;
use itertools::Itertools;
use ruff_text_size::{Ranged, TextRange, TextSize};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::{int, str::Quote, LiteralExpressionRef};
@ -1244,8 +1244,9 @@ pub struct FStringFlags(FStringFlagsInner);
impl FStringFlags {
#[must_use]
pub fn with_double_quotes(mut self) -> Self {
self.0 |= FStringFlagsInner::DOUBLE;
pub fn with_quote_style(mut self, quote_style: Quote) -> Self {
self.0
.set(FStringFlagsInner::DOUBLE, quote_style.is_double());
self
}
@ -1256,18 +1257,16 @@ impl FStringFlags {
}
#[must_use]
pub fn with_prefix(self, prefix: FStringPrefix) -> Self {
let FStringFlags(flags) = self;
pub fn with_prefix(mut self, prefix: FStringPrefix) -> Self {
match prefix {
FStringPrefix::Regular => {
Self(flags - FStringFlagsInner::R_PREFIX_LOWER - FStringFlagsInner::R_PREFIX_UPPER)
Self(self.0 - FStringFlagsInner::R_PREFIX_LOWER - FStringFlagsInner::R_PREFIX_UPPER)
}
FStringPrefix::Raw { uppercase_r } => {
self.0.set(FStringFlagsInner::R_PREFIX_UPPER, uppercase_r);
self.0.set(FStringFlagsInner::R_PREFIX_LOWER, !uppercase_r);
self
}
FStringPrefix::Raw { uppercase_r: true } => Self(
(flags | FStringFlagsInner::R_PREFIX_UPPER) - FStringFlagsInner::R_PREFIX_LOWER,
),
FStringPrefix::Raw { uppercase_r: false } => Self(
(flags | FStringFlagsInner::R_PREFIX_LOWER) - FStringFlagsInner::R_PREFIX_UPPER,
),
}
}
@ -1582,8 +1581,9 @@ pub struct StringLiteralFlags(StringLiteralFlagsInner);
impl StringLiteralFlags {
#[must_use]
pub fn with_double_quotes(mut self) -> Self {
self.0 |= StringLiteralFlagsInner::DOUBLE;
pub fn with_quote_style(mut self, quote_style: Quote) -> Self {
self.0
.set(StringLiteralFlagsInner::DOUBLE, quote_style.is_double());
self
}
@ -1996,8 +1996,9 @@ pub struct BytesLiteralFlags(BytesLiteralFlagsInner);
impl BytesLiteralFlags {
#[must_use]
pub fn with_double_quotes(mut self) -> Self {
self.0 |= BytesLiteralFlagsInner::DOUBLE;
pub fn with_quote_style(mut self, quote_style: Quote) -> Self {
self.0
.set(BytesLiteralFlagsInner::DOUBLE, quote_style.is_double());
self
}
@ -2008,23 +2009,20 @@ impl BytesLiteralFlags {
}
#[must_use]
pub fn with_prefix(self, prefix: ByteStringPrefix) -> Self {
let BytesLiteralFlags(flags) = self;
pub fn with_prefix(mut self, prefix: ByteStringPrefix) -> Self {
match prefix {
ByteStringPrefix::Regular => Self(
flags
- BytesLiteralFlagsInner::R_PREFIX_LOWER
- BytesLiteralFlagsInner::R_PREFIX_UPPER,
),
ByteStringPrefix::Raw { uppercase_r: true } => Self(
(flags | BytesLiteralFlagsInner::R_PREFIX_UPPER)
- BytesLiteralFlagsInner::R_PREFIX_LOWER,
),
ByteStringPrefix::Raw { uppercase_r: false } => Self(
(flags | BytesLiteralFlagsInner::R_PREFIX_LOWER)
- BytesLiteralFlagsInner::R_PREFIX_UPPER,
),
}
ByteStringPrefix::Regular => {
self.0 -= BytesLiteralFlagsInner::R_PREFIX_LOWER;
self.0 -= BytesLiteralFlagsInner::R_PREFIX_UPPER;
}
ByteStringPrefix::Raw { uppercase_r } => {
self.0
.set(BytesLiteralFlagsInner::R_PREFIX_UPPER, uppercase_r);
self.0
.set(BytesLiteralFlagsInner::R_PREFIX_LOWER, !uppercase_r);
}
};
self
}
pub const fn prefix(self) -> ByteStringPrefix {
@ -2108,6 +2106,439 @@ impl From<BytesLiteral> for Expr {
}
}
bitflags! {
/// Flags that can be queried to obtain information
/// regarding the prefixes and quotes used for a string literal.
///
/// Note that not all of these flags can be validly combined -- e.g.,
/// it is invalid to combine the `U_PREFIX` flag with any other
/// of the `*_PREFIX` flags. As such, the recommended way to set the
/// prefix flags is by calling the `as_flags()` method on the
/// `StringPrefix` enum.
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct AnyStringFlags: u8 {
/// The string uses double quotes (`"`).
/// If this flag is not set, the string uses single quotes (`'`).
const DOUBLE = 1 << 0;
/// The string is triple-quoted:
/// it begins and ends with three consecutive quote characters.
const TRIPLE_QUOTED = 1 << 1;
/// The string has a `u` or `U` prefix.
/// While this prefix is a no-op at runtime,
/// strings with this prefix can have no other prefixes set.
const U_PREFIX = 1 << 2;
/// The string has a `b` or `B` prefix.
/// This means that the string is a sequence of `int`s at runtime,
/// rather than a sequence of `str`s.
/// Strings with this flag can also be raw strings,
/// but can have no other prefixes.
const B_PREFIX = 1 << 3;
/// The string has a `f` or `F` prefix, meaning it is an f-string.
/// F-strings can also be raw strings,
/// but can have no other prefixes.
const F_PREFIX = 1 << 4;
/// The string has an `r` prefix, meaning it is a raw string.
/// F-strings and byte-strings can be raw,
/// as can strings with no other prefixes.
/// U-strings cannot be raw.
const R_PREFIX_LOWER = 1 << 5;
/// The string has an `R` prefix, meaning it is a raw string.
/// The casing of the `r`/`R` has no semantic significance at runtime;
/// see https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
/// for why we track the casing of the `r` prefix,
/// but not for any other prefix
const R_PREFIX_UPPER = 1 << 6;
}
}
/// Enumeration of all the possible valid prefixes
/// prior to a Python string literal.
///
/// Using the `as_flags()` method on variants of this enum
/// is the recommended way to set `*_PREFIX` flags from the
/// `StringFlags` bitflag, as it means that you cannot accidentally
/// set a combination of `*_PREFIX` flags that would be invalid
/// at runtime in Python.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum AnyStringPrefix {
/// Prefixes that indicate the string is a bytestring
Bytes(ByteStringPrefix),
/// Prefixes that indicate the string is an f-string
Format(FStringPrefix),
/// All other prefixes
Regular(StringLiteralPrefix),
}
impl TryFrom<char> for AnyStringPrefix {
type Error = String;
fn try_from(value: char) -> Result<Self, String> {
let result = match value {
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
'f' | 'F' => Self::Format(FStringPrefix::Regular),
_ => return Err(format!("Unexpected prefix '{value}'")),
};
Ok(result)
}
}
impl TryFrom<[char; 2]> for AnyStringPrefix {
type Error = String;
fn try_from(value: [char; 2]) -> Result<Self, String> {
let result = match value {
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: false })
}
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: true })
}
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
}
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
}
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
};
Ok(result)
}
}
impl AnyStringPrefix {
const fn as_flags(self) -> AnyStringFlags {
match self {
// regular strings
Self::Regular(StringLiteralPrefix::Empty) => AnyStringFlags::empty(),
Self::Regular(StringLiteralPrefix::Unicode) => AnyStringFlags::U_PREFIX,
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
AnyStringFlags::R_PREFIX_LOWER
}
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
AnyStringFlags::R_PREFIX_UPPER
}
// bytestrings
Self::Bytes(ByteStringPrefix::Regular) => AnyStringFlags::B_PREFIX,
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
AnyStringFlags::B_PREFIX.union(AnyStringFlags::R_PREFIX_LOWER)
}
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
AnyStringFlags::B_PREFIX.union(AnyStringFlags::R_PREFIX_UPPER)
}
// f-strings
Self::Format(FStringPrefix::Regular) => AnyStringFlags::F_PREFIX,
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
AnyStringFlags::F_PREFIX.union(AnyStringFlags::R_PREFIX_LOWER)
}
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
AnyStringFlags::F_PREFIX.union(AnyStringFlags::R_PREFIX_UPPER)
}
}
}
const fn from_kind(kind: AnyStringKind) -> Self {
let AnyStringKind(flags) = kind;
// f-strings
if flags.contains(AnyStringFlags::F_PREFIX) {
if flags.contains(AnyStringFlags::R_PREFIX_LOWER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(AnyStringFlags::R_PREFIX_UPPER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
}
return Self::Format(FStringPrefix::Regular);
}
// bytestrings
if flags.contains(AnyStringFlags::B_PREFIX) {
if flags.contains(AnyStringFlags::R_PREFIX_LOWER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(AnyStringFlags::R_PREFIX_UPPER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
}
return Self::Bytes(ByteStringPrefix::Regular);
}
// all other strings
if flags.contains(AnyStringFlags::R_PREFIX_LOWER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
}
if flags.contains(AnyStringFlags::R_PREFIX_UPPER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
}
if flags.contains(AnyStringFlags::U_PREFIX) {
return Self::Regular(StringLiteralPrefix::Unicode);
}
Self::Regular(StringLiteralPrefix::Empty)
}
pub const fn as_str(self) -> &'static str {
match self {
Self::Regular(regular_prefix) => regular_prefix.as_str(),
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
}
}
}
impl fmt::Display for AnyStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl Default for AnyStringPrefix {
fn default() -> Self {
Self::Regular(StringLiteralPrefix::Empty)
}
}
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct AnyStringKind(AnyStringFlags);
impl AnyStringKind {
#[must_use]
pub fn with_prefix(mut self, prefix: AnyStringPrefix) -> Self {
self.0 |= prefix.as_flags();
self
}
pub const fn prefix(self) -> AnyStringPrefix {
AnyStringPrefix::from_kind(self)
}
pub fn new(prefix: AnyStringPrefix, quotes: Quote, triple_quoted: bool) -> Self {
let new = Self::default().with_prefix(prefix).with_quote_style(quotes);
if triple_quoted {
new.with_triple_quotes()
} else {
new
}
}
/// Does the string have a `u` or `U` prefix?
pub const fn is_u_string(self) -> bool {
self.0.contains(AnyStringFlags::U_PREFIX)
}
/// Does the string have an `r` or `R` prefix?
pub const fn is_raw_string(self) -> bool {
self.0
.intersects(AnyStringFlags::R_PREFIX_LOWER.union(AnyStringFlags::R_PREFIX_UPPER))
}
/// Does the string have an `f` or `F` prefix?
pub const fn is_f_string(self) -> bool {
self.0.contains(AnyStringFlags::F_PREFIX)
}
/// Does the string have a `b` or `B` prefix?
pub const fn is_byte_string(self) -> bool {
self.0.contains(AnyStringFlags::B_PREFIX)
}
/// Does the string use single or double quotes in its opener and closer?
pub const fn quote_style(self) -> Quote {
if self.0.contains(AnyStringFlags::DOUBLE) {
Quote::Double
} else {
Quote::Single
}
}
/// Is the string triple-quoted, i.e.,
/// does it begin and end with three consecutive quote characters?
pub const fn is_triple_quoted(self) -> bool {
self.0.contains(AnyStringFlags::TRIPLE_QUOTED)
}
/// A `str` representation of the quotes used to start and close.
/// This does not include any prefixes the string has in its opener.
pub const fn quote_str(self) -> &'static str {
if self.is_triple_quoted() {
match self.quote_style() {
Quote::Single => "'''",
Quote::Double => r#"""""#,
}
} else {
match self.quote_style() {
Quote::Single => "'",
Quote::Double => "\"",
}
}
}
/// The length of the prefixes used (if any) in the string's opener.
pub fn prefix_len(self) -> TextSize {
self.prefix().as_str().text_len()
}
/// The length of the quotes used to start and close the string.
/// This does not include the length of any prefixes the string has
/// in its opener.
pub const fn quote_len(self) -> TextSize {
if self.is_triple_quoted() {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
/// The total length of the string's opener,
/// i.e., the length of the prefixes plus the length
/// of the quotes used to open the string.
pub fn opener_len(self) -> TextSize {
self.prefix_len() + self.quote_len()
}
/// The total length of the string's closer.
/// This is always equal to `self.quote_len()`,
/// but is provided here for symmetry with the `opener_len()` method.
pub const fn closer_len(self) -> TextSize {
self.quote_len()
}
pub fn format_string_contents(self, contents: &str) -> String {
format!(
"{}{}{}{}",
self.prefix(),
self.quote_str(),
contents,
self.quote_str()
)
}
#[must_use]
pub fn with_quote_style(mut self, quotes: Quote) -> Self {
match quotes {
Quote::Double => self.0 |= AnyStringFlags::DOUBLE,
Quote::Single => self.0 -= AnyStringFlags::DOUBLE,
};
self
}
#[must_use]
pub fn with_triple_quotes(mut self) -> Self {
self.0 |= AnyStringFlags::TRIPLE_QUOTED;
self
}
}
impl fmt::Debug for AnyStringKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StringKind")
.field("prefix", &self.prefix())
.field("triple_quoted", &self.is_triple_quoted())
.field("quote_style", &self.quote_style())
.finish()
}
}
impl From<AnyStringKind> for StringLiteralFlags {
fn from(value: AnyStringKind) -> StringLiteralFlags {
let AnyStringPrefix::Regular(prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into a regular string",
value.prefix()
)
};
let new = StringLiteralFlags::default()
.with_quote_style(value.quote_style())
.with_prefix(prefix);
if value.is_triple_quoted() {
new.with_triple_quotes()
} else {
new
}
}
}
impl From<StringLiteralFlags> for AnyStringKind {
fn from(value: StringLiteralFlags) -> Self {
Self::new(
AnyStringPrefix::Regular(value.prefix()),
value.quote_style(),
value.is_triple_quoted(),
)
}
}
impl From<AnyStringKind> for BytesLiteralFlags {
fn from(value: AnyStringKind) -> BytesLiteralFlags {
let AnyStringPrefix::Bytes(bytestring_prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into a bytestring",
value.prefix()
)
};
let new = BytesLiteralFlags::default()
.with_quote_style(value.quote_style())
.with_prefix(bytestring_prefix);
if value.is_triple_quoted() {
new.with_triple_quotes()
} else {
new
}
}
}
impl From<BytesLiteralFlags> for AnyStringKind {
fn from(value: BytesLiteralFlags) -> Self {
Self::new(
AnyStringPrefix::Bytes(value.prefix()),
value.quote_style(),
value.is_triple_quoted(),
)
}
}
impl From<AnyStringKind> for FStringFlags {
fn from(value: AnyStringKind) -> FStringFlags {
let AnyStringPrefix::Format(fstring_prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into an f-string",
value.prefix()
)
};
let new = FStringFlags::default()
.with_quote_style(value.quote_style())
.with_prefix(fstring_prefix);
if value.is_triple_quoted() {
new.with_triple_quotes()
} else {
new
}
}
}
impl From<FStringFlags> for AnyStringKind {
fn from(value: FStringFlags) -> Self {
Self::new(
AnyStringPrefix::Format(value.prefix()),
value.quote_style(),
value.is_triple_quoted(),
)
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExprNumberLiteral {
pub range: TextRange,

View file

@ -25,7 +25,6 @@ ruff_python_parser = { path = "../ruff_python_parser" }
ruff_text_size = { path = "../ruff_text_size" }
anyhow = { workspace = true }
bitflags = { workspace = true }
clap = { workspace = true }
countme = { workspace = true }
itertools = { workspace = true }

View file

@ -1,8 +1,7 @@
use ruff_python_ast::BytesLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{StringNormalizer, StringPart};
use crate::string::StringNormalizer;
#[derive(Default)]
pub struct FormatBytesLiteral;
@ -13,7 +12,7 @@ impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
StringNormalizer::from_context(f.context())
.with_preferred_quote_style(f.options().quote_style())
.normalize(&StringPart::from_source(item.range(), &locator), &locator)
.normalize(item.into(), &locator)
.fmt(f)
}
}

View file

@ -1,11 +1,10 @@
use ruff_formatter::write;
use ruff_python_ast::FString;
use ruff_python_ast::{AnyStringKind, FString};
use ruff_source_file::Locator;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::preview::is_f_string_formatting_enabled;
use crate::string::{Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes};
use crate::string::{Quoting, StringNormalizer, StringQuotes};
use super::f_string_element::FormatFStringElement;
@ -30,8 +29,6 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
let string = StringPart::from_source(self.value.range(), &locator);
let normalizer = StringNormalizer::from_context(f.context())
.with_quoting(self.quoting)
.with_preferred_quote_style(f.options().quote_style());
@ -39,7 +36,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
// If f-string formatting is disabled (not in preview), then we will
// fall back to the previous behavior of normalizing the f-string.
if !is_f_string_formatting_enabled(f.context()) {
let result = normalizer.normalize(&string, &locator).fmt(f);
let result = normalizer.normalize(self.value.into(), &locator).fmt(f);
let comments = f.context().comments();
self.value.elements.iter().for_each(|value| {
comments.mark_verbatim_node_comments_formatted(value.into());
@ -59,16 +56,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
return result;
}
let quote_selection = normalizer.choose_quotes(&string, &locator);
let string_kind = normalizer.choose_quotes(self.value.into(), &locator).kind();
let context = FStringContext::new(
string.prefix(),
quote_selection.quotes(),
string_kind,
FStringLayout::from_f_string(self.value, &locator),
);
// Starting prefix and quote
write!(f, [string.prefix(), quote_selection.quotes()])?;
let quotes = StringQuotes::from(string_kind);
write!(f, [string_kind.prefix(), quotes])?;
f.join()
.entries(
@ -80,32 +77,23 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
.finish()?;
// Ending quote
quote_selection.quotes().fmt(f)
quotes.fmt(f)
}
}
#[derive(Clone, Copy, Debug)]
pub(crate) struct FStringContext {
prefix: StringPrefix,
quotes: StringQuotes,
kind: AnyStringKind,
layout: FStringLayout,
}
impl FStringContext {
const fn new(prefix: StringPrefix, quotes: StringQuotes, layout: FStringLayout) -> Self {
Self {
prefix,
quotes,
layout,
}
const fn new(kind: AnyStringKind, layout: FStringLayout) -> Self {
Self { kind, layout }
}
pub(crate) const fn quotes(self) -> StringQuotes {
self.quotes
}
pub(crate) const fn prefix(self) -> StringPrefix {
self.prefix
pub(crate) fn kind(self) -> AnyStringKind {
self.kind
}
pub(crate) const fn layout(self) -> FStringLayout {

View file

@ -56,13 +56,7 @@ impl<'a> FormatFStringLiteralElement<'a> {
impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let literal_content = f.context().locator().slice(self.element.range());
let normalized = normalize_string(
literal_content,
0,
self.context.quotes(),
self.context.prefix(),
true,
);
let normalized = normalize_string(literal_content, 0, self.context.kind(), true);
match &normalized {
Cow::Borrowed(_) => source_text_slice(self.element.range()).fmt(f),
Cow::Owned(normalized) => text(normalized).fmt(f),

View file

@ -1,8 +1,7 @@
use ruff_python_ast::StringLiteral;
use ruff_text_size::Ranged;
use crate::prelude::*;
use crate::string::{docstring, Quoting, StringNormalizer, StringPart};
use crate::string::{docstring, Quoting, StringNormalizer};
use crate::QuoteStyle;
pub(crate) struct FormatStringLiteral<'a> {
@ -61,10 +60,7 @@ impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
let normalized = StringNormalizer::from_context(f.context())
.with_quoting(self.layout.quoting())
.with_preferred_quote_style(quote_style)
.normalize(
&StringPart::from_source(self.value.range(), &locator),
&locator,
);
.normalize(self.value.into(), &locator);
if self.layout.is_docstring() {
docstring::format(&normalized, f)

View file

@ -3,17 +3,17 @@ use std::iter::FusedIterator;
use memchr::memchr2;
use ruff_python_ast::{
self as ast, AnyNodeRef, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
StringLiteral,
self as ast, AnyNodeRef, AnyStringKind, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral,
ExpressionRef, StringLiteral,
};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange};
use ruff_text_size::{Ranged, TextRange};
use crate::expression::expr_f_string::f_string_quoting;
use crate::other::f_string::FormatFString;
use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
use crate::prelude::*;
use crate::string::{Quoting, StringPrefix, StringQuotes};
use crate::string::Quoting;
/// Represents any kind of string expression. This could be either a string,
/// bytes or f-string.
@ -70,14 +70,10 @@ impl<'a> AnyString<'a> {
pub(crate) fn is_multiline(self, source: &str) -> bool {
match self {
AnyString::String(_) | AnyString::Bytes(_) => {
let contents = &source[self.range()];
let prefix = StringPrefix::parse(contents);
let quotes = StringQuotes::parse(
&contents[TextRange::new(prefix.text_len(), contents.text_len())],
);
quotes.is_some_and(StringQuotes::is_triple)
&& memchr2(b'\n', b'\r', contents.as_bytes()).is_some()
self.parts(Quoting::default())
.next()
.is_some_and(|part| part.kind().is_triple_quoted())
&& memchr2(b'\n', b'\r', source[self.range()].as_bytes()).is_some()
}
AnyString::FString(fstring) => {
memchr2(b'\n', b'\r', source[fstring.range].as_bytes()).is_some()
@ -179,6 +175,16 @@ pub(super) enum AnyStringPart<'a> {
},
}
impl AnyStringPart<'_> {
fn kind(&self) -> AnyStringKind {
match self {
Self::String { part, .. } => part.flags.into(),
Self::Bytes(bytes_literal) => bytes_literal.flags.into(),
Self::FString { part, .. } => part.flags.into(),
}
}
}
impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
fn from(value: &AnyStringPart<'a>) -> Self {
match value {

View file

@ -18,6 +18,7 @@ use {
ruff_text_size::{Ranged, TextLen, TextRange, TextSize},
};
use crate::string::StringQuotes;
use crate::{prelude::*, DocstringCodeLineWidth, FormatModuleError};
use super::NormalizedString;
@ -126,7 +127,9 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
let mut lines = docstring.split('\n').peekable();
// Start the string
write!(f, [normalized.prefix(), normalized.quotes()])?;
let kind = normalized.kind();
let quotes = StringQuotes::from(kind);
write!(f, [kind.prefix(), quotes])?;
// We track where in the source docstring we are (in source code byte offsets)
let mut offset = normalized.start();
@ -142,7 +145,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
// inner quotes and closing quotes from getting to close to avoid `""""content`
if trim_both.starts_with(normalized.quotes().quote_char.as_char()) {
if trim_both.starts_with(quotes.quote_char.as_char()) {
space().fmt(f)?;
}
@ -169,7 +172,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
{
space().fmt(f)?;
}
normalized.quotes().fmt(f)?;
quotes.fmt(f)?;
return Ok(());
}
@ -195,7 +198,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
offset,
stripped_indentation,
already_normalized,
quote_char: normalized.quotes().quote_char,
quote_char: quotes.quote_char,
code_example: CodeExample::default(),
}
.add_iter(lines)?;
@ -208,7 +211,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form
space().fmt(f)?;
}
write!(f, [normalized.quotes()])
write!(f, [quotes])
}
fn contains_unescaped_newline(haystack: &str) -> bool {
@ -1570,7 +1573,7 @@ fn docstring_format_source(
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
trim_end.ends_with(normalized.quotes().quote_char.as_char())
trim_end.ends_with(normalized.kind().quote_style().as_char())
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
}

View file

@ -1,11 +1,9 @@
use bitflags::bitflags;
pub(crate) use any::AnyString;
pub(crate) use normalize::{normalize_string, NormalizedString, StringNormalizer};
use ruff_formatter::format_args;
use ruff_python_ast::str::Quote;
use ruff_source_file::Locator;
use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, AnyStringKind, AnyStringPrefix};
use ruff_text_size::{Ranged, TextRange};
use crate::comments::{leading_comments, trailing_comments};
use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space;
@ -55,132 +53,17 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
}
}
#[derive(Debug)]
pub(crate) struct StringPart {
/// The prefix.
prefix: StringPrefix,
/// The actual quotes of the string in the source
quotes: StringQuotes,
/// The range of the string's content (full range minus quotes and prefix)
content_range: TextRange,
}
impl StringPart {
pub(crate) fn from_source(range: TextRange, locator: &Locator) -> Self {
let string_content = locator.slice(range);
let prefix = StringPrefix::parse(string_content);
let after_prefix = &string_content[usize::from(prefix.text_len())..];
let quotes =
StringQuotes::parse(after_prefix).expect("Didn't find string quotes after prefix");
let relative_raw_content_range = TextRange::new(
prefix.text_len() + quotes.text_len(),
string_content.text_len() - quotes.text_len(),
);
let raw_content_range = relative_raw_content_range + range.start();
Self {
prefix,
content_range: raw_content_range,
quotes,
}
}
/// Returns the prefix of the string part.
pub(crate) const fn prefix(&self) -> StringPrefix {
self.prefix
}
/// Returns the surrounding quotes of the string part.
pub(crate) const fn quotes(&self) -> StringQuotes {
self.quotes
}
/// Returns the range of the string's content in the source (minus prefix and quotes).
pub(crate) const fn content_range(&self) -> TextRange {
self.content_range
}
}
bitflags! {
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) struct StringPrefix: u8 {
const UNICODE = 0b0000_0001;
/// `r"test"`
const RAW = 0b0000_0010;
/// `R"test"
const RAW_UPPER = 0b0000_0100;
const BYTE = 0b0000_1000;
const F_STRING = 0b0001_0000;
}
}
impl StringPrefix {
pub(crate) fn parse(input: &str) -> StringPrefix {
let chars = input.chars();
let mut prefix = StringPrefix::empty();
for c in chars {
let flag = match c {
'u' | 'U' => StringPrefix::UNICODE,
'f' | 'F' => StringPrefix::F_STRING,
'b' | 'B' => StringPrefix::BYTE,
'r' => StringPrefix::RAW,
'R' => StringPrefix::RAW_UPPER,
'\'' | '"' => break,
c => {
unreachable!(
"Unexpected character '{c}' terminating the prefix of a string literal"
);
}
};
prefix |= flag;
}
prefix
}
pub(crate) const fn text_len(self) -> TextSize {
TextSize::new(self.bits().count_ones())
}
pub(super) const fn is_raw_string(self) -> bool {
self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER)
}
pub(super) const fn is_fstring(self) -> bool {
self.contains(StringPrefix::F_STRING)
}
pub(super) const fn is_byte(self) -> bool {
self.contains(StringPrefix::BYTE)
}
}
impl Format<PyFormatContext<'_>> for StringPrefix {
impl Format<PyFormatContext<'_>> for AnyStringPrefix {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
// Retain the casing for the raw prefix:
// https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
if self.contains(StringPrefix::RAW) {
token("r").fmt(f)?;
} else if self.contains(StringPrefix::RAW_UPPER) {
token("R").fmt(f)?;
}
if self.contains(StringPrefix::BYTE) {
token("b").fmt(f)?;
}
if self.contains(StringPrefix::F_STRING) {
token("f").fmt(f)?;
}
// Remove the unicode prefix `u` if any because it is meaningless in Python 3+.
if !matches!(
self,
AnyStringPrefix::Regular(
ast::StringLiteralPrefix::Empty | ast::StringLiteralPrefix::Unicode
)
) {
token(self.as_str()).fmt(f)?;
}
Ok(())
}
}
@ -191,34 +74,6 @@ pub(crate) struct StringQuotes {
quote_char: Quote,
}
impl StringQuotes {
pub(crate) fn parse(input: &str) -> Option<StringQuotes> {
let mut chars = input.chars();
let quote_char = chars.next()?;
let quote = Quote::try_from(quote_char).ok()?;
let triple = chars.next() == Some(quote_char) && chars.next() == Some(quote_char);
Some(Self {
triple,
quote_char: quote,
})
}
pub(crate) const fn is_triple(self) -> bool {
self.triple
}
const fn text_len(self) -> TextSize {
if self.triple {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
}
impl Format<PyFormatContext<'_>> for StringQuotes {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let quotes = match (self.quote_char, self.triple) {
@ -232,6 +87,15 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
}
}
impl From<AnyStringKind> for StringQuotes {
fn from(value: AnyStringKind) -> Self {
Self {
triple: value.is_triple_quoted(),
quote_char: value.quote_style(),
}
}
}
impl TryFrom<QuoteStyle> for Quote {
type Error = ();
@ -252,3 +116,58 @@ impl From<Quote> for QuoteStyle {
}
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct StringPart {
kind: AnyStringKind,
range: TextRange,
}
impl Ranged for StringPart {
fn range(&self) -> TextRange {
self.range
}
}
impl StringPart {
/// Use the `kind()` method to retrieve information about the
fn kind(self) -> AnyStringKind {
self.kind
}
/// Returns the range of the string's content in the source (minus prefix and quotes).
fn content_range(self) -> TextRange {
let kind = self.kind();
TextRange::new(
self.start() + kind.opener_len(),
self.end() - kind.closer_len(),
)
}
}
impl From<&ast::StringLiteral> for StringPart {
fn from(value: &ast::StringLiteral) -> Self {
Self {
range: value.range,
kind: value.flags.into(),
}
}
}
impl From<&ast::BytesLiteral> for StringPart {
fn from(value: &ast::BytesLiteral) -> Self {
Self {
range: value.range,
kind: value.flags.into(),
}
}
}
impl From<&ast::FString> for StringPart {
fn from(value: &ast::FString) -> Self {
Self {
range: value.range,
kind: value.flags.into(),
}
}
}

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::iter::FusedIterator;
use ruff_formatter::FormatContext;
use ruff_python_ast::str::Quote;
use ruff_python_ast::{str::Quote, AnyStringKind};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
@ -10,7 +10,7 @@ use crate::context::FStringState;
use crate::options::PythonVersion;
use crate::prelude::*;
use crate::preview::is_f_string_formatting_enabled;
use crate::string::{Quoting, StringPart, StringPrefix, StringQuotes};
use crate::string::{Quoting, StringPart, StringQuotes};
use crate::QuoteStyle;
pub(crate) struct StringNormalizer {
@ -44,7 +44,7 @@ impl StringNormalizer {
self
}
fn quoting(&self, string: &StringPart) -> Quoting {
fn quoting(&self, string: StringPart) -> Quoting {
if let FStringState::InsideExpressionElement(context) = self.f_string_state {
// If we're inside an f-string, we need to make sure to preserve the
// existing quotes unless we're inside a triple-quoted f-string and
@ -60,7 +60,7 @@ impl StringNormalizer {
// The reason to preserve the quotes is based on the assumption that
// the original f-string is valid in terms of quoting, and we don't
// want to change that to make it invalid.
if (context.quotes().is_triple() && !string.quotes().is_triple())
if (context.kind().is_triple_quoted() && !string.kind().is_triple_quoted())
|| self.target_version.supports_pep_701()
{
self.quoting
@ -73,18 +73,19 @@ impl StringNormalizer {
}
/// Computes the strings preferred quotes.
pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
pub(crate) fn choose_quotes(&self, string: StringPart, locator: &Locator) -> QuoteSelection {
let raw_content = locator.slice(string.content_range());
let first_quote_or_normalized_char_offset = raw_content
.bytes()
.position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
let string_kind = string.kind();
let quotes = match self.quoting(string) {
Quoting::Preserve => string.quotes(),
let new_kind = match self.quoting(string) {
Quoting::Preserve => string_kind,
Quoting::CanChange => {
// Per PEP 8, always prefer double quotes for triple-quoted strings.
// Except when using quote-style-preserve.
let preferred_style = if string.quotes().triple {
let preferred_style = if string_kind.is_triple_quoted() {
// ... unless we're formatting a code snippet inside a docstring,
// then we specifically want to invert our quote style to avoid
// writing out invalid Python.
@ -145,33 +146,30 @@ impl StringNormalizer {
if let Some(first_quote_or_normalized_char_offset) =
first_quote_or_normalized_char_offset
{
if string.prefix().is_raw_string() {
if string_kind.is_raw_string() {
choose_quotes_for_raw_string(
&raw_content[first_quote_or_normalized_char_offset..],
string.quotes(),
string_kind,
preferred_quote,
)
} else {
choose_quotes_impl(
&raw_content[first_quote_or_normalized_char_offset..],
string.quotes(),
string_kind,
preferred_quote,
)
}
} else {
StringQuotes {
quote_char: preferred_quote,
triple: string.quotes().is_triple(),
}
string_kind.with_quote_style(preferred_quote)
}
} else {
string.quotes()
string_kind
}
}
};
QuoteSelection {
quotes,
kind: new_kind,
first_quote_or_normalized_char_offset,
}
}
@ -179,11 +177,10 @@ impl StringNormalizer {
/// Computes the strings preferred quotes and normalizes its content.
pub(crate) fn normalize<'a>(
&self,
string: &StringPart,
string: StringPart,
locator: &'a Locator,
) -> NormalizedString<'a> {
let raw_content = locator.slice(string.content_range());
let quote_selection = self.choose_quotes(string, locator);
let normalized = if let Some(first_quote_or_escape_offset) =
@ -192,8 +189,7 @@ impl StringNormalizer {
normalize_string(
raw_content,
first_quote_or_escape_offset,
quote_selection.quotes,
string.prefix(),
quote_selection.kind,
// TODO: Remove the `b'{'` in `choose_quotes` when promoting the
// `format_fstring` preview style
self.format_fstring,
@ -203,34 +199,31 @@ impl StringNormalizer {
};
NormalizedString {
prefix: string.prefix(),
kind: quote_selection.kind,
content_range: string.content_range(),
text: normalized,
quotes: quote_selection.quotes,
}
}
}
#[derive(Debug)]
pub(crate) struct QuoteSelection {
quotes: StringQuotes,
kind: AnyStringKind,
/// Offset to the first quote character or character that needs special handling in [`normalize_string`].
first_quote_or_normalized_char_offset: Option<usize>,
}
impl QuoteSelection {
pub(crate) fn quotes(&self) -> StringQuotes {
self.quotes
pub(crate) fn kind(&self) -> AnyStringKind {
self.kind
}
}
#[derive(Debug)]
pub(crate) struct NormalizedString<'a> {
prefix: crate::string::StringPrefix,
/// The quotes of the normalized string (preferred quotes)
quotes: StringQuotes,
/// Holds data about the quotes and prefix of the string
kind: AnyStringKind,
/// The range of the string's content in the source (minus prefix and quotes).
content_range: TextRange,
@ -244,12 +237,8 @@ impl<'a> NormalizedString<'a> {
&self.text
}
pub(crate) fn quotes(&self) -> StringQuotes {
self.quotes
}
pub(crate) fn prefix(&self) -> StringPrefix {
self.prefix
pub(crate) fn kind(&self) -> AnyStringKind {
self.kind
}
}
@ -261,7 +250,8 @@ impl Ranged for NormalizedString<'_> {
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
ruff_formatter::write!(f, [self.prefix, self.quotes])?;
let quotes = StringQuotes::from(self.kind);
ruff_formatter::write!(f, [self.kind.prefix(), quotes])?;
match &self.text {
Cow::Borrowed(_) => {
source_text_slice(self.range()).fmt(f)?;
@ -270,7 +260,7 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
text(normalized).fmt(f)?;
}
}
self.quotes.fmt(f)
quotes.fmt(f)
}
}
@ -281,9 +271,9 @@ impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
/// style is double quotes.
fn choose_quotes_for_raw_string(
input: &str,
quotes: StringQuotes,
kind: AnyStringKind,
preferred_quote: Quote,
) -> StringQuotes {
) -> AnyStringKind {
let preferred_quote_char = preferred_quote.as_char();
let mut chars = input.chars().peekable();
let contains_unescaped_configured_quotes = loop {
@ -294,7 +284,7 @@ fn choose_quotes_for_raw_string(
}
// `"` or `'`
Some(c) if c == preferred_quote_char => {
if !quotes.triple {
if !kind.is_triple_quoted() {
break true;
}
@ -319,14 +309,10 @@ fn choose_quotes_for_raw_string(
None => break false,
}
};
StringQuotes {
triple: quotes.triple,
quote_char: if contains_unescaped_configured_quotes {
quotes.quote_char
} else {
preferred_quote
},
if contains_unescaped_configured_quotes {
kind
} else {
kind.with_quote_style(preferred_quote)
}
}
@ -338,8 +324,8 @@ fn choose_quotes_for_raw_string(
/// For triple quoted strings, the preferred quote style is always used, unless the string contains
/// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
/// used unless the string contains `"""`).
fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote) -> StringQuotes {
let quote = if quotes.triple {
fn choose_quotes_impl(input: &str, kind: AnyStringKind, preferred_quote: Quote) -> AnyStringKind {
let quote = if kind.is_triple_quoted() {
// True if the string contains a triple quote sequence of the configured quote style.
let mut uses_triple_quotes = false;
let mut chars = input.chars().peekable();
@ -393,7 +379,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
if uses_triple_quotes {
// String contains a triple quote sequence of the configured quote style.
// Keep the existing quote style.
quotes.quote_char
kind.quote_style()
} else {
preferred_quote
}
@ -433,10 +419,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
}
};
StringQuotes {
triple: quotes.triple,
quote_char: quote,
}
kind.with_quote_style(quote)
}
/// Adds the necessary quote escapes and removes unnecessary escape sequences when quoting `input`
@ -446,8 +429,7 @@ fn choose_quotes_impl(input: &str, quotes: StringQuotes, preferred_quote: Quote)
pub(crate) fn normalize_string(
input: &str,
start_offset: usize,
quotes: StringQuotes,
prefix: StringPrefix,
kind: AnyStringKind,
format_fstring: bool,
) -> Cow<str> {
// The normalized string if `input` is not yet normalized.
@ -457,14 +439,14 @@ pub(crate) fn normalize_string(
// If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
let mut last_index = 0;
let quote = quotes.quote_char;
let quote = kind.quote_style();
let preferred_quote = quote.as_char();
let opposite_quote = quote.opposite().as_char();
let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
let is_raw = prefix.is_raw_string();
let is_fstring = !format_fstring && prefix.is_fstring();
let is_raw = kind.is_raw_string();
let is_fstring = !format_fstring && kind.is_f_string();
let mut formatted_value_nesting = 0u32;
while let Some((index, c)) = chars.next() {
@ -502,7 +484,7 @@ pub(crate) fn normalize_string(
} else {
// Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
let escape_start_len = '\\'.len_utf8() + next.len_utf8();
if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
if let Some(normalised) = UnicodeEscape::new(next, !kind.is_byte_string())
.and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
{
let escape_start_offset = index + escape_start_len;
@ -521,7 +503,7 @@ pub(crate) fn normalize_string(
}
}
if !quotes.triple {
if !kind.is_triple_quoted() {
#[allow(clippy::if_same_then_else)]
if next == opposite_quote && formatted_value_nesting == 0 {
// Remove the escape by ending before the backslash and starting again with the quote
@ -534,7 +516,10 @@ pub(crate) fn normalize_string(
}
}
}
} else if !quotes.triple && c == preferred_quote && formatted_value_nesting == 0 {
} else if !kind.is_triple_quoted()
&& c == preferred_quote
&& formatted_value_nesting == 0
{
// Escape the quote
output.push_str(&input[last_index..index]);
output.push('\\');
@ -704,9 +689,7 @@ impl UnicodeEscape {
mod tests {
use std::borrow::Cow;
use ruff_python_ast::str::Quote;
use crate::string::{StringPrefix, StringQuotes};
use ruff_python_ast::{str::Quote, AnyStringKind, AnyStringPrefix, ByteStringPrefix};
use super::{normalize_string, UnicodeEscape};
@ -727,11 +710,11 @@ mod tests {
let normalized = normalize_string(
input,
0,
StringQuotes {
triple: false,
quote_char: Quote::Double,
},
StringPrefix::BYTE,
AnyStringKind::new(
AnyStringPrefix::Bytes(ByteStringPrefix::Regular),
Quote::Double,
false,
),
true,
);

View file

@ -34,19 +34,15 @@ use std::{char, cmp::Ordering, str::FromStr};
use unicode_ident::{is_xid_continue, is_xid_start};
use unicode_normalization::UnicodeNormalization;
use ruff_python_ast::{FStringPrefix, Int, IpyEscapeKind};
use ruff_python_ast::{
str::Quote, AnyStringKind, AnyStringPrefix, FStringPrefix, Int, IpyEscapeKind,
};
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::lexer::cursor::{Cursor, EOF_CHAR};
use crate::lexer::fstring::{FStringContext, FStrings};
use crate::lexer::indentation::{Indentation, Indentations};
use crate::{
soft_keywords::SoftKeywordTransformer,
string::FStringErrorType,
string_token_flags::{StringKind, StringPrefix},
token::Tok,
Mode,
};
use crate::{soft_keywords::SoftKeywordTransformer, string::FStringErrorType, token::Tok, Mode};
mod cursor;
mod fstring;
@ -188,14 +184,14 @@ impl<'source> Lexer<'source> {
return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true }));
}
(_, quote @ ('\'' | '"')) => {
if let Ok(prefix) = StringPrefix::try_from(first) {
if let Ok(prefix) = AnyStringPrefix::try_from(first) {
self.cursor.bump();
return self.lex_string(prefix, quote);
}
}
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
self.cursor.bump();
if let Ok(prefix) = StringPrefix::try_from([first, second]) {
if let Ok(prefix) = AnyStringPrefix::try_from([first, second]) {
let quote = self.cursor.bump().unwrap();
return self.lex_string(prefix, quote);
}
@ -560,11 +556,14 @@ impl<'source> Lexer<'source> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut kind = StringKind::from_prefix(StringPrefix::Format(prefix));
let mut kind = AnyStringKind::default()
.with_prefix(AnyStringPrefix::Format(prefix))
.with_quote_style(if quote == '"' {
Quote::Double
} else {
Quote::Single
});
if quote == '"' {
kind = kind.with_double_quotes();
}
if self.cursor.eat_char2(quote, quote) {
kind = kind.with_triple_quotes();
}
@ -708,15 +707,17 @@ impl<'source> Lexer<'source> {
}
/// Lex a string literal.
fn lex_string(&mut self, prefix: StringPrefix, quote: char) -> Result<Tok, LexicalError> {
fn lex_string(&mut self, prefix: AnyStringPrefix, quote: char) -> Result<Tok, LexicalError> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut kind = StringKind::from_prefix(prefix);
if quote == '"' {
kind = kind.with_double_quotes();
}
let mut kind = AnyStringKind::default()
.with_prefix(prefix)
.with_quote_style(if quote == '"' {
Quote::Double
} else {
Quote::Single
});
// If the next two characters are also the quote character, then we have a triple-quoted
// string; consume those two characters and ensure that we require a triple-quote to close
@ -1082,7 +1083,7 @@ impl<'source> Lexer<'source> {
c if is_ascii_identifier_start(c) => self.lex_identifier(c)?,
'0'..='9' => self.lex_number(c)?,
'#' => return Ok((self.lex_comment(), self.token_range())),
'\'' | '"' => self.lex_string(StringPrefix::default(), c)?,
'\'' | '"' => self.lex_string(AnyStringPrefix::default(), c)?,
'=' => {
if self.cursor.eat_char('=') {
Tok::EqEqual

View file

@ -1,9 +1,9 @@
use crate::string_token_flags::StringKind;
use ruff_python_ast::AnyStringKind;
/// The context representing the current f-string that the lexer is in.
#[derive(Debug)]
pub(crate) struct FStringContext {
kind: StringKind,
kind: AnyStringKind,
/// The level of nesting for the lexer when it entered the current f-string.
/// The nesting level includes all kinds of parentheses i.e., round, square,
@ -17,7 +17,7 @@ pub(crate) struct FStringContext {
}
impl FStringContext {
pub(crate) const fn new(kind: StringKind, nesting: u32) -> Self {
pub(crate) const fn new(kind: AnyStringKind, nesting: u32) -> Self {
debug_assert!(kind.is_f_string());
Self {
kind,
@ -26,7 +26,7 @@ impl FStringContext {
}
}
pub(crate) const fn kind(&self) -> StringKind {
pub(crate) const fn kind(&self) -> AnyStringKind {
debug_assert!(self.kind.is_f_string());
self.kind
}

View file

@ -115,7 +115,6 @@ pub use parser::{
};
use ruff_python_ast::{Mod, PySourceType, Suite};
pub use string::FStringErrorType;
pub use string_token_flags::StringKind;
pub use token::{Tok, TokenKind};
use crate::lexer::LexResult;
@ -128,7 +127,6 @@ pub mod lexer;
mod parser;
mod soft_keywords;
mod string;
mod string_token_flags;
mod token;
mod token_source;
pub mod typing;

View file

@ -4,7 +4,7 @@
// See also: https://greentreesnakes.readthedocs.io/en/latest/nodes.html#keyword
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
use crate::{
FStringErrorType,
Mode,
@ -12,7 +12,6 @@ use crate::{
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
string_token_flags::StringKind,
token,
invalid,
};
@ -1983,7 +1982,7 @@ extern {
Dedent => token::Tok::Dedent,
StartModule => token::Tok::StartModule,
StartExpression => token::Tok::StartExpression,
fstring_start => token::Tok::FStringStart(<StringKind>),
fstring_start => token::Tok::FStringStart(<AnyStringKind>),
FStringEnd => token::Tok::FStringEnd,
"!" => token::Tok::Exclamation,
"?" => token::Tok::Question,
@ -2076,11 +2075,11 @@ extern {
complex => token::Tok::Complex { real: <f64>, imag: <f64> },
string => token::Tok::String {
value: <Box<str>>,
kind: <StringKind>,
kind: <AnyStringKind>,
},
fstring_middle => token::Tok::FStringMiddle {
value: <Box<str>>,
kind: <StringKind>,
kind: <AnyStringKind>,
},
name => token::Tok::Name { name: <Box<str>> },
ipy_escape_command => token::Tok::IpyEscapeCommand {

View file

@ -1,7 +1,7 @@
// auto-generated: "lalrpop 0.20.0"
// sha3: c98876ae871e13c1a0cabf962138ded61584185a0c3144b626dac60f707ea396
// sha3: 4ca26eae1233cf922ef88887715de0a4ca45076324249a20b87f095e9638165d
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
use crate::{
FStringErrorType,
Mode,
@ -9,7 +9,6 @@ use crate::{
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
string_token_flags::StringKind,
token,
invalid,
};
@ -26,7 +25,7 @@ extern crate alloc;
mod __parse__Top {
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind};
use ruff_python_ast::{self as ast, Int, IpyEscapeKind, AnyStringKind};
use crate::{
FStringErrorType,
Mode,
@ -34,7 +33,6 @@ mod __parse__Top {
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
string_token_flags::StringKind,
token,
invalid,
};
@ -52,8 +50,8 @@ mod __parse__Top {
Variant0(token::Tok),
Variant1((f64, f64)),
Variant2(f64),
Variant3((Box<str>, StringKind)),
Variant4(StringKind),
Variant3((Box<str>, AnyStringKind)),
Variant4(AnyStringKind),
Variant5(Int),
Variant6((IpyEscapeKind, Box<str>)),
Variant7(Box<str>),
@ -151,7 +149,7 @@ mod __parse__Top {
Variant99(ast::TypeParams),
Variant100(core::option::Option<ast::TypeParams>),
Variant101(ast::UnaryOp),
Variant102(core::option::Option<(Box<str>, StringKind)>),
Variant102(core::option::Option<(Box<str>, AnyStringKind)>),
}
const __ACTION: &[i16] = &[
// State 0
@ -18322,7 +18320,7 @@ mod __parse__Top {
fn __pop_Variant3<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, (Box<str>, StringKind), TextSize)
) -> (TextSize, (Box<str>, AnyStringKind), TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant3(__v), __r)) => (__l, __v, __r),
@ -18479,6 +18477,16 @@ mod __parse__Top {
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant4<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, AnyStringKind, TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant7<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
@ -18509,16 +18517,6 @@ mod __parse__Top {
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant4<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, StringKind, TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant4(__v), __r)) => (__l, __v, __r),
_ => __symbol_type_mismatch()
}
}
fn __pop_Variant67<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
@ -19102,7 +19100,7 @@ mod __parse__Top {
fn __pop_Variant102<
>(
__symbols: &mut alloc::vec::Vec<(TextSize,__Symbol<>,TextSize)>
) -> (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize)
) -> (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize)
{
match __symbols.pop() {
Some((__l, __Symbol::Variant102(__v), __r)) => (__l, __v, __r),
@ -35724,7 +35722,7 @@ fn __action185<
(_, parameters, _): (TextSize, core::option::Option<ast::Parameters>, TextSize),
(_, end_location_args, _): (TextSize, TextSize, TextSize),
(_, _, _): (TextSize, token::Tok, TextSize),
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
(_, fstring_middle, _): (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
(_, body, _): (TextSize, crate::parser::ParenthesizedExpr, TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
@ -36179,7 +36177,7 @@ fn __action218<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, string, _): (TextSize, (Box<str>, StringKind), TextSize),
(_, string, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -36196,7 +36194,7 @@ fn __action219<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, start, _): (TextSize, StringKind, TextSize),
(_, start, _): (TextSize, AnyStringKind, TextSize),
(_, elements, _): (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
(_, _, _): (TextSize, token::Tok, TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
@ -36230,7 +36228,7 @@ fn __action221<
source_code: &str,
mode: Mode,
(_, location, _): (TextSize, TextSize, TextSize),
(_, fstring_middle, _): (TextSize, (Box<str>, StringKind), TextSize),
(_, fstring_middle, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
(_, end_location, _): (TextSize, TextSize, TextSize),
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -37185,8 +37183,8 @@ fn __action282<
>(
source_code: &str,
mode: Mode,
(_, __0, _): (TextSize, (Box<str>, StringKind), TextSize),
) -> core::option::Option<(Box<str>, StringKind)>
(_, __0, _): (TextSize, (Box<str>, AnyStringKind), TextSize),
) -> core::option::Option<(Box<str>, AnyStringKind)>
{
Some(__0)
}
@ -37199,7 +37197,7 @@ fn __action283<
mode: Mode,
__lookbehind: &TextSize,
__lookahead: &TextSize,
) -> core::option::Option<(Box<str>, StringKind)>
) -> core::option::Option<(Box<str>, AnyStringKind)>
{
None
}
@ -47957,7 +47955,7 @@ fn __action791<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, TextSize, TextSize),
@ -48017,7 +48015,7 @@ fn __action793<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
__1: (TextSize, TextSize, TextSize),
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -49121,7 +49119,7 @@ fn __action828<
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
__2: (TextSize, TextSize, TextSize),
__3: (TextSize, token::Tok, TextSize),
__4: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__4: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__5: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
__6: (TextSize, TextSize, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
@ -52139,7 +52137,7 @@ fn __action924<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
__1: (TextSize, TextSize, TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -63911,7 +63909,7 @@ fn __action1304<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
__2: (TextSize, token::Tok, TextSize),
) -> StringType
@ -63967,7 +63965,7 @@ fn __action1306<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
) -> Result<ast::FStringElement,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
let __start0 = __0.2;
@ -64870,7 +64868,7 @@ fn __action1338<
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, core::option::Option<ast::Parameters>, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -69379,7 +69377,7 @@ fn __action1485<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, (Box<str>, StringKind), TextSize),
__0: (TextSize, (Box<str>, AnyStringKind), TextSize),
) -> Result<StringType,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
let __start0 = __0.2;
@ -72279,7 +72277,7 @@ fn __action1578<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, token::Tok, TextSize),
) -> StringType
{
@ -72307,7 +72305,7 @@ fn __action1579<
>(
source_code: &str,
mode: Mode,
__0: (TextSize, StringKind, TextSize),
__0: (TextSize, AnyStringKind, TextSize),
__1: (TextSize, alloc::vec::Vec<ast::FStringElement>, TextSize),
__2: (TextSize, token::Tok, TextSize),
) -> StringType
@ -76896,7 +76894,7 @@ fn __action1716<
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, ast::Parameters, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__3: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -76927,7 +76925,7 @@ fn __action1717<
mode: Mode,
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, token::Tok, TextSize),
__2: (TextSize, core::option::Option<(Box<str>, StringKind)>, TextSize),
__2: (TextSize, core::option::Option<(Box<str>, AnyStringKind)>, TextSize),
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -78832,7 +78830,7 @@ fn __action1774<
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, ast::Parameters, TextSize),
__2: (TextSize, token::Tok, TextSize),
__3: (TextSize, (Box<str>, StringKind), TextSize),
__3: (TextSize, (Box<str>, AnyStringKind), TextSize),
__4: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{
@ -78895,7 +78893,7 @@ fn __action1776<
mode: Mode,
__0: (TextSize, token::Tok, TextSize),
__1: (TextSize, token::Tok, TextSize),
__2: (TextSize, (Box<str>, StringKind), TextSize),
__2: (TextSize, (Box<str>, AnyStringKind), TextSize),
__3: (TextSize, crate::parser::ParenthesizedExpr, TextSize),
) -> Result<crate::parser::ParenthesizedExpr,__lalrpop_util::ParseError<TextSize,token::Tok,LexicalError>>
{

View file

@ -2,11 +2,10 @@
use bstr::ByteSlice;
use ruff_python_ast::{self as ast, Expr};
use ruff_python_ast::{self as ast, AnyStringKind, Expr};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::lexer::{LexicalError, LexicalErrorType};
use crate::string_token_flags::StringKind;
use crate::token::Tok;
pub(crate) enum StringType {
@ -43,13 +42,13 @@ enum EscapedChar {
struct StringParser {
source: Box<str>,
cursor: usize,
kind: StringKind,
kind: AnyStringKind,
offset: TextSize,
range: TextRange,
}
impl StringParser {
fn new(source: Box<str>, kind: StringKind, offset: TextSize, range: TextRange) -> Self {
fn new(source: Box<str>, kind: AnyStringKind, offset: TextSize, range: TextRange) -> Self {
Self {
source,
cursor: 0,
@ -425,7 +424,7 @@ impl StringParser {
pub(crate) fn parse_string_literal(
source: Box<str>,
kind: StringKind,
kind: AnyStringKind,
range: TextRange,
) -> Result<StringType, LexicalError> {
StringParser::new(source, kind, range.start() + kind.opener_len(), range).parse()
@ -433,7 +432,7 @@ pub(crate) fn parse_string_literal(
pub(crate) fn parse_fstring_literal_element(
source: Box<str>,
kind: StringKind,
kind: AnyStringKind,
range: TextRange,
) -> Result<ast::FStringElement, LexicalError> {
StringParser::new(source, kind, range.start(), range).parse_fstring_middle()

View file

@ -1,395 +0,0 @@
use std::fmt;
use bitflags::bitflags;
use ruff_python_ast::{str::Quote, ByteStringPrefix, FStringPrefix, StringLiteralPrefix};
use ruff_text_size::{TextLen, TextSize};
bitflags! {
/// Flags that can be queried to obtain information
/// regarding the prefixes and quotes used for a string literal.
///
/// Note that not all of these flags can be validly combined -- e.g.,
/// it is invalid to combine the `U_PREFIX` flag with any other
/// of the `*_PREFIX` flags. As such, the recommended way to set the
/// prefix flags is by calling the `as_flags()` method on the
/// `StringPrefix` enum.
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct StringFlags: u8 {
/// The string uses double quotes (`"`).
/// If this flag is not set, the string uses single quotes (`'`).
const DOUBLE = 1 << 0;
/// The string is triple-quoted:
/// it begins and ends with three consecutive quote characters.
const TRIPLE_QUOTED = 1 << 1;
/// The string has a `u` or `U` prefix.
/// While this prefix is a no-op at runtime,
/// strings with this prefix can have no other prefixes set.
const U_PREFIX = 1 << 2;
/// The string has a `b` or `B` prefix.
/// This means that the string is a sequence of `int`s at runtime,
/// rather than a sequence of `str`s.
/// Strings with this flag can also be raw strings,
/// but can have no other prefixes.
const B_PREFIX = 1 << 3;
/// The string has a `f` or `F` prefix, meaning it is an f-string.
/// F-strings can also be raw strings,
/// but can have no other prefixes.
const F_PREFIX = 1 << 4;
/// The string has an `r` prefix, meaning it is a raw string.
/// F-strings and byte-strings can be raw,
/// as can strings with no other prefixes.
/// U-strings cannot be raw.
const R_PREFIX_LOWER = 1 << 5;
/// The string has an `R` prefix, meaning it is a raw string.
/// The casing of the `r`/`R` has no semantic significance at runtime;
/// see https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
/// for why we track the casing of the `r` prefix,
/// but not for any other prefix
const R_PREFIX_UPPER = 1 << 6;
}
}
/// Enumeration of all the possible valid prefixes
/// prior to a Python string literal.
///
/// Using the `as_flags()` method on variants of this enum
/// is the recommended way to set `*_PREFIX` flags from the
/// `StringFlags` bitflag, as it means that you cannot accidentally
/// set a combination of `*_PREFIX` flags that would be invalid
/// at runtime in Python.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum StringPrefix {
/// Prefixes that indicate the string is a bytestring
Bytes(ByteStringPrefix),
/// Prefixes that indicate the string is an f-string
Format(FStringPrefix),
/// All other prefixes
Regular(StringLiteralPrefix),
}
impl TryFrom<char> for StringPrefix {
type Error = String;
fn try_from(value: char) -> Result<Self, String> {
let result = match value {
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
'f' | 'F' => Self::Format(FStringPrefix::Regular),
_ => return Err(format!("Unexpected prefix '{value}'")),
};
Ok(result)
}
}
impl TryFrom<[char; 2]> for StringPrefix {
type Error = String;
fn try_from(value: [char; 2]) -> Result<Self, String> {
let result = match value {
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: false })
}
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: true })
}
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
}
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
}
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
};
Ok(result)
}
}
impl StringPrefix {
const fn as_flags(self) -> StringFlags {
match self {
// regular strings
Self::Regular(StringLiteralPrefix::Empty) => StringFlags::empty(),
Self::Regular(StringLiteralPrefix::Unicode) => StringFlags::U_PREFIX,
Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => {
StringFlags::R_PREFIX_LOWER
}
Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => {
StringFlags::R_PREFIX_UPPER
}
// bytestrings
Self::Bytes(ByteStringPrefix::Regular) => StringFlags::B_PREFIX,
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => {
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_LOWER)
}
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => {
StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_UPPER)
}
// f-strings
Self::Format(FStringPrefix::Regular) => StringFlags::F_PREFIX,
Self::Format(FStringPrefix::Raw { uppercase_r: false }) => {
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_LOWER)
}
Self::Format(FStringPrefix::Raw { uppercase_r: true }) => {
StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_UPPER)
}
}
}
const fn from_kind(kind: StringKind) -> Self {
let StringKind(flags) = kind;
// f-strings
if flags.contains(StringFlags::F_PREFIX) {
if flags.contains(StringFlags::R_PREFIX_LOWER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(StringFlags::R_PREFIX_UPPER) {
return Self::Format(FStringPrefix::Raw { uppercase_r: true });
}
return Self::Format(FStringPrefix::Regular);
}
// bytestrings
if flags.contains(StringFlags::B_PREFIX) {
if flags.contains(StringFlags::R_PREFIX_LOWER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false });
}
if flags.contains(StringFlags::R_PREFIX_UPPER) {
return Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true });
}
return Self::Bytes(ByteStringPrefix::Regular);
}
// all other strings
if flags.contains(StringFlags::R_PREFIX_LOWER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: false });
}
if flags.contains(StringFlags::R_PREFIX_UPPER) {
return Self::Regular(StringLiteralPrefix::Raw { uppercase: true });
}
if flags.contains(StringFlags::U_PREFIX) {
return Self::Regular(StringLiteralPrefix::Unicode);
}
Self::Regular(StringLiteralPrefix::Empty)
}
const fn as_str(self) -> &'static str {
match self {
Self::Regular(regular_prefix) => regular_prefix.as_str(),
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
}
}
}
impl fmt::Display for StringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl Default for StringPrefix {
fn default() -> Self {
Self::Regular(StringLiteralPrefix::Empty)
}
}
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StringKind(StringFlags);
impl StringKind {
pub(crate) const fn from_prefix(prefix: StringPrefix) -> Self {
Self(prefix.as_flags())
}
pub const fn prefix(self) -> StringPrefix {
StringPrefix::from_kind(self)
}
/// Does the string have a `u` or `U` prefix?
pub const fn is_u_string(self) -> bool {
self.0.contains(StringFlags::U_PREFIX)
}
/// Does the string have an `r` or `R` prefix?
pub const fn is_raw_string(self) -> bool {
self.0
.intersects(StringFlags::R_PREFIX_LOWER.union(StringFlags::R_PREFIX_UPPER))
}
/// Does the string have an `f` or `F` prefix?
pub const fn is_f_string(self) -> bool {
self.0.contains(StringFlags::F_PREFIX)
}
/// Does the string have a `b` or `B` prefix?
pub const fn is_byte_string(self) -> bool {
self.0.contains(StringFlags::B_PREFIX)
}
/// Does the string use single or double quotes in its opener and closer?
pub const fn quote_style(self) -> Quote {
if self.0.contains(StringFlags::DOUBLE) {
Quote::Double
} else {
Quote::Single
}
}
/// Is the string triple-quoted, i.e.,
/// does it begin and end with three consecutive quote characters?
pub const fn is_triple_quoted(self) -> bool {
self.0.contains(StringFlags::TRIPLE_QUOTED)
}
/// A `str` representation of the quotes used to start and close.
/// This does not include any prefixes the string has in its opener.
pub const fn quote_str(self) -> &'static str {
if self.is_triple_quoted() {
match self.quote_style() {
Quote::Single => "'''",
Quote::Double => r#"""""#,
}
} else {
match self.quote_style() {
Quote::Single => "'",
Quote::Double => "\"",
}
}
}
/// The length of the prefixes used (if any) in the string's opener.
pub fn prefix_len(self) -> TextSize {
self.prefix().as_str().text_len()
}
/// The length of the quotes used to start and close the string.
/// This does not include the length of any prefixes the string has
/// in its opener.
pub const fn quote_len(self) -> TextSize {
if self.is_triple_quoted() {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
/// The total length of the string's opener,
/// i.e., the length of the prefixes plus the length
/// of the quotes used to open the string.
pub fn opener_len(self) -> TextSize {
self.prefix_len() + self.quote_len()
}
/// The total length of the string's closer.
/// This is always equal to `self.quote_len()`,
/// but is provided here for symmetry with the `opener_len()` method.
pub const fn closer_len(self) -> TextSize {
self.quote_len()
}
pub fn format_string_contents(self, contents: &str) -> String {
format!(
"{}{}{}{}",
self.prefix(),
self.quote_str(),
contents,
self.quote_str()
)
}
#[must_use]
pub fn with_double_quotes(mut self) -> Self {
self.0 |= StringFlags::DOUBLE;
self
}
#[must_use]
pub fn with_triple_quotes(mut self) -> Self {
self.0 |= StringFlags::TRIPLE_QUOTED;
self
}
}
impl fmt::Debug for StringKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StringKind")
.field("prefix", &self.prefix())
.field("triple_quoted", &self.is_triple_quoted())
.field("quote_style", &self.quote_style())
.finish()
}
}
impl From<StringKind> for ruff_python_ast::StringLiteralFlags {
fn from(value: StringKind) -> ruff_python_ast::StringLiteralFlags {
let mut new = ruff_python_ast::StringLiteralFlags::default();
if value.quote_style().is_double() {
new = new.with_double_quotes();
}
if value.is_triple_quoted() {
new = new.with_triple_quotes();
}
let StringPrefix::Regular(prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into a regular string",
value.prefix()
)
};
new.with_prefix(prefix)
}
}
impl From<StringKind> for ruff_python_ast::BytesLiteralFlags {
fn from(value: StringKind) -> ruff_python_ast::BytesLiteralFlags {
let mut new = ruff_python_ast::BytesLiteralFlags::default();
if value.quote_style().is_double() {
new = new.with_double_quotes();
}
if value.is_triple_quoted() {
new = new.with_triple_quotes();
}
let StringPrefix::Bytes(bytestring_prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into a bytestring",
value.prefix()
)
};
new.with_prefix(bytestring_prefix)
}
}
impl From<StringKind> for ruff_python_ast::FStringFlags {
fn from(value: StringKind) -> ruff_python_ast::FStringFlags {
let mut new = ruff_python_ast::FStringFlags::default();
if value.quote_style().is_double() {
new = new.with_double_quotes();
}
if value.is_triple_quoted() {
new = new.with_triple_quotes();
}
let StringPrefix::Format(fstring_prefix) = value.prefix() else {
unreachable!(
"Should never attempt to convert {} into an f-string",
value.prefix()
)
};
new.with_prefix(fstring_prefix)
}
}

View file

@ -4,10 +4,9 @@
//! loosely based on the token definitions found in the [CPython source].
//!
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h;
use crate::string_token_flags::StringKind;
use crate::Mode;
use ruff_python_ast::{Int, IpyEscapeKind};
use ruff_python_ast::{AnyStringKind, Int, IpyEscapeKind};
use std::fmt;
/// The set of tokens the Python source code can be tokenized in.
@ -44,11 +43,11 @@ pub enum Tok {
value: Box<str>,
/// Flags that can be queried to determine the quote style
/// and prefixes of the string
kind: StringKind,
kind: AnyStringKind,
},
/// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
/// and the opening quote(s).
FStringStart(StringKind),
FStringStart(AnyStringKind),
/// Token value that includes the portion of text inside the f-string that's not
/// part of the expression part and isn't an opening or closing brace.
FStringMiddle {
@ -56,7 +55,7 @@ pub enum Tok {
value: Box<str>,
/// Flags that can be queried to determine the quote style
/// and prefixes of the string
kind: StringKind,
kind: AnyStringKind,
},
/// Token value for the end of an f-string. This includes the closing quote.
FStringEnd,