Move string-prefix enumerations to a separate submodule (#11425)

## Summary

This moves the string-prefix enumerations in `ruff_python_ast` to a
separate submodule. I think this helps clarify that these prefixes are
purely abstract: they only depend on each other, and do not depend on
any of the other code in `nodes.rs` in any way. Moreover, while various
AST nodes _use_ them, they're not really nodes themselves, so they feel
slightly out of place in `nodes.rs`.

I considered moving all of them to `str.rs`, but it felt like enough
code that it could be a separate submodule.

## Test Plan

`cargo test`
This commit is contained in:
Alex Waygood 2024-05-15 07:40:27 -04:00 committed by GitHub
parent effe3ad4ef
commit 6963f75a14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 290 additions and 274 deletions

View file

@ -0,0 +1,202 @@
use std::fmt;
/// Enumerations of the valid prefixes a string literal can have.
///
/// Bytestrings and f-strings are excluded from this enumeration,
/// as they are represented by different AST nodes.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, is_macro::Is)]
pub enum StringLiteralPrefix {
/// Just a regular string with no prefixes
Empty,
/// A string with a `u` or `U` prefix, e.g. `u"foo"`.
/// Note that, despite this variant's name,
/// it is in fact a no-op at runtime to use the `u` or `U` prefix
/// in Python. All Python-3 strings are unicode strings;
/// this prefix is only allowed in Python 3 for backwards compatibility
/// with Python 2. However, using this prefix in a Python string
/// is mutually exclusive with an `r` or `R` prefix.
Unicode,
/// A "raw" string, that has an `r` or `R` prefix,
/// e.g. `r"foo\."` or `R'bar\d'`.
Raw { uppercase: bool },
}
impl StringLiteralPrefix {
/// Return a `str` representation of the prefix
pub const fn as_str(self) -> &'static str {
match self {
Self::Empty => "",
Self::Unicode => "u",
Self::Raw { uppercase: true } => "R",
Self::Raw { uppercase: false } => "r",
}
}
}
impl fmt::Display for StringLiteralPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Enumeration of the valid prefixes an f-string literal can have.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum FStringPrefix {
/// Just a regular f-string with no other prefixes, e.g. f"{bar}"
Regular,
/// A "raw" format-string, that has an `r` or `R` prefix,
/// e.g. `rf"{bar}"` or `Rf"{bar}"`
Raw { uppercase_r: bool },
}
impl FStringPrefix {
/// Return a `str` representation of the prefix
pub const fn as_str(self) -> &'static str {
match self {
Self::Regular => "f",
Self::Raw { uppercase_r: true } => "Rf",
Self::Raw { uppercase_r: false } => "rf",
}
}
/// Return true if this prefix indicates a "raw f-string",
/// e.g. `rf"{bar}"` or `Rf"{bar}"`
pub const fn is_raw(self) -> bool {
matches!(self, Self::Raw { .. })
}
}
impl fmt::Display for FStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Enumeration of the valid prefixes a bytestring literal can have.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum ByteStringPrefix {
/// Just a regular bytestring with no other prefixes, e.g. `b"foo"`
Regular,
/// A "raw" bytestring, that has an `r` or `R` prefix,
/// e.g. `Rb"foo"` or `rb"foo"`
Raw { uppercase_r: bool },
}
impl ByteStringPrefix {
/// Return a `str` representation of the prefix
pub const fn as_str(self) -> &'static str {
match self {
Self::Regular => "b",
Self::Raw { uppercase_r: true } => "Rb",
Self::Raw { uppercase_r: false } => "rb",
}
}
/// Return true if this prefix indicates a "raw bytestring",
/// e.g. `rb"foo"` or `Rb"foo"`
pub const fn is_raw(self) -> bool {
matches!(self, Self::Raw { .. })
}
}
impl fmt::Display for ByteStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Enumeration of all the possible valid prefixes
/// prior to a Python string literal.
///
/// Using the `as_flags()` method on variants of this enum
/// is the recommended way to set `*_PREFIX` flags from the
/// `StringFlags` bitflag, as it means that you cannot accidentally
/// set a combination of `*_PREFIX` flags that would be invalid
/// at runtime in Python.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, is_macro::Is)]
pub enum AnyStringPrefix {
/// Prefixes that indicate the string is a bytestring
Bytes(ByteStringPrefix),
/// Prefixes that indicate the string is an f-string
Format(FStringPrefix),
/// All other prefixes
Regular(StringLiteralPrefix),
}
impl AnyStringPrefix {
pub const fn as_str(self) -> &'static str {
match self {
Self::Regular(regular_prefix) => regular_prefix.as_str(),
Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(),
Self::Format(fstring_prefix) => fstring_prefix.as_str(),
}
}
pub const fn is_raw(self) -> bool {
match self {
Self::Regular(regular_prefix) => regular_prefix.is_raw(),
Self::Bytes(bytestring_prefix) => bytestring_prefix.is_raw(),
Self::Format(fstring_prefix) => fstring_prefix.is_raw(),
}
}
}
impl TryFrom<char> for AnyStringPrefix {
type Error = String;
fn try_from(value: char) -> Result<Self, String> {
let result = match value {
'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }),
'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }),
'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode),
'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular),
'f' | 'F' => Self::Format(FStringPrefix::Regular),
_ => return Err(format!("Unexpected prefix '{value}'")),
};
Ok(result)
}
}
impl TryFrom<[char; 2]> for AnyStringPrefix {
type Error = String;
fn try_from(value: [char; 2]) -> Result<Self, String> {
let result = match value {
['r', 'f' | 'F'] | ['f' | 'F', 'r'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: false })
}
['R', 'f' | 'F'] | ['f' | 'F', 'R'] => {
Self::Format(FStringPrefix::Raw { uppercase_r: true })
}
['r', 'b' | 'B'] | ['b' | 'B', 'r'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false })
}
['R', 'b' | 'B'] | ['b' | 'B', 'R'] => {
Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true })
}
_ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
};
Ok(result)
}
}
impl fmt::Display for AnyStringPrefix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl Default for AnyStringPrefix {
fn default() -> Self {
Self::Regular(StringLiteralPrefix::Empty)
}
}