Track quoting style in the tokenizer (#10256)

This commit is contained in:
Alex Waygood 2024-03-08 08:40:06 +00:00 committed by GitHub
parent 72c9f7e4c9
commit c504d7ab11
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 4595 additions and 3800 deletions

View file

@ -12,7 +12,7 @@
//! # Example
//!
//! ```
//! use ruff_python_parser::{lexer::lex, Tok, Mode, StringKind};
//! use ruff_python_parser::{lexer::lex, Tok, Mode};
//!
//! let source = "x = 'RustPython'";
//! let tokens = lex(source, Mode::Module)
@ -37,12 +37,13 @@ use ruff_python_ast::{Int, IpyEscapeKind};
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::lexer::cursor::{Cursor, EOF_CHAR};
use crate::lexer::fstring::{FStringContext, FStringContextFlags, FStrings};
use crate::lexer::fstring::{FStringContext, FStrings};
use crate::lexer::indentation::{Indentation, Indentations};
use crate::{
soft_keywords::SoftKeywordTransformer,
string::FStringErrorType,
token::{StringKind, Tok},
string_token_flags::{StringKind, StringPrefix},
token::Tok,
Mode,
};
@ -181,16 +182,16 @@ impl<'source> Lexer<'source> {
return Ok(self.lex_fstring_start(quote, true));
}
(_, quote @ ('\'' | '"')) => {
if let Ok(string_kind) = StringKind::try_from(first) {
if let Ok(prefix) = StringPrefix::try_from(first) {
self.cursor.bump();
return self.lex_string(string_kind, quote);
return self.lex_string(Some(prefix), quote);
}
}
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
self.cursor.bump();
if let Ok(string_kind) = StringKind::try_from([first, second]) {
if let Ok(prefix) = StringPrefix::try_from([first, second]) {
let quote = self.cursor.bump().unwrap();
return self.lex_string(string_kind, quote);
return self.lex_string(Some(prefix), quote);
}
}
_ => {}
@ -538,19 +539,21 @@ impl<'source> Lexer<'source> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut flags = FStringContextFlags::empty();
let mut kind = StringKind::from_prefix(Some(if is_raw_string {
StringPrefix::RawFormat
} else {
StringPrefix::Format
}));
if quote == '"' {
flags |= FStringContextFlags::DOUBLE;
}
if is_raw_string {
flags |= FStringContextFlags::RAW;
kind = kind.with_double_quotes();
}
if self.cursor.eat_char2(quote, quote) {
flags |= FStringContextFlags::TRIPLE;
kind = kind.with_triple_quotes();
}
self.fstrings.push(FStringContext::new(flags, self.nesting));
Tok::FStringStart
self.fstrings.push(FStringContext::new(kind, self.nesting));
Tok::FStringStart(kind)
}
/// Lex a f-string middle or end token.
@ -683,24 +686,35 @@ impl<'source> Lexer<'source> {
};
Ok(Some(Tok::FStringMiddle {
value: value.into_boxed_str(),
is_raw: fstring.is_raw_string(),
triple_quoted: fstring.is_triple_quoted(),
kind: fstring.kind(),
}))
}
/// Lex a string literal.
fn lex_string(&mut self, kind: StringKind, quote: char) -> Result<Tok, LexicalError> {
fn lex_string(
&mut self,
prefix: Option<StringPrefix>,
quote: char,
) -> Result<Tok, LexicalError> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut kind = StringKind::from_prefix(prefix);
if quote == '"' {
kind = kind.with_double_quotes();
}
// If the next two characters are also the quote character, then we have a triple-quoted
// string; consume those two characters and ensure that we require a triple-quote to close
let triple_quoted = self.cursor.eat_char2(quote, quote);
if self.cursor.eat_char2(quote, quote) {
kind = kind.with_triple_quotes();
}
let value_start = self.offset();
let quote_byte = u8::try_from(quote).expect("char that fits in u8");
let value_end = if triple_quoted {
let value_end = if kind.is_triple_quoted() {
// For triple-quoted strings, scan until we find the closing quote (ignoring escaped
// quotes) or the end of the file.
loop {
@ -712,7 +726,7 @@ impl<'source> Lexer<'source> {
// matches with f-strings quotes and if it is, then this must be a
// missing '}' token so raise the proper error.
if fstring.quote_char() == quote
&& fstring.is_triple_quoted() == triple_quoted
&& fstring.is_triple_quoted() == kind.is_triple_quoted()
{
return Err(LexicalError::new(
LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
@ -761,7 +775,7 @@ impl<'source> Lexer<'source> {
// matches with f-strings quotes and if it is, then this must be a
// missing '}' token so raise the proper error.
if fstring.quote_char() == quote
&& fstring.is_triple_quoted() == triple_quoted
&& fstring.is_triple_quoted() == kind.is_triple_quoted()
{
return Err(LexicalError::new(
LexicalErrorType::FStringError(FStringErrorType::UnclosedLbrace),
@ -832,7 +846,6 @@ impl<'source> Lexer<'source> {
.to_string()
.into_boxed_str(),
kind,
triple_quoted,
})
}
@ -843,7 +856,7 @@ impl<'source> Lexer<'source> {
if !fstring.is_in_expression(self.nesting) {
match self.lex_fstring_middle_or_end() {
Ok(Some(tok)) => {
if tok == Tok::FStringEnd {
if tok.is_f_string_end() {
self.fstrings.pop();
}
return Ok((tok, self.token_range()));
@ -1056,7 +1069,7 @@ impl<'source> Lexer<'source> {
c if is_ascii_identifier_start(c) => self.lex_identifier(c)?,
'0'..='9' => self.lex_number(c)?,
'#' => return Ok((self.lex_comment(), self.token_range())),
'"' | '\'' => self.lex_string(StringKind::String, c)?,
'\'' | '"' => self.lex_string(None, c)?,
'=' => {
if self.cursor.eat_char('=') {
Tok::EqEqual

View file

@ -1,27 +1,9 @@
use bitflags::bitflags;
bitflags! {
#[derive(Debug)]
pub(crate) struct FStringContextFlags: u8 {
/// The current f-string is a triple-quoted f-string i.e., the number of
/// opening quotes is 3. If this flag is not set, the number of opening
/// quotes is 1.
const TRIPLE = 1 << 0;
/// The current f-string is a double-quoted f-string. If this flag is not
/// set, the current f-string is a single-quoted f-string.
const DOUBLE = 1 << 1;
/// The current f-string is a raw f-string i.e., prefixed with `r`/`R`.
/// If this flag is not set, the current f-string is a normal f-string.
const RAW = 1 << 2;
}
}
use crate::string_token_flags::StringKind;
/// The context representing the current f-string that the lexer is in.
#[derive(Debug)]
pub(crate) struct FStringContext {
flags: FStringContextFlags,
kind: StringKind,
/// The level of nesting for the lexer when it entered the current f-string.
/// The nesting level includes all kinds of parentheses i.e., round, square,
@ -35,49 +17,47 @@ pub(crate) struct FStringContext {
}
impl FStringContext {
pub(crate) const fn new(flags: FStringContextFlags, nesting: u32) -> Self {
pub(crate) const fn new(kind: StringKind, nesting: u32) -> Self {
debug_assert!(kind.is_f_string());
Self {
flags,
kind,
nesting,
format_spec_depth: 0,
}
}
pub(crate) const fn kind(&self) -> StringKind {
debug_assert!(self.kind.is_f_string());
self.kind
}
pub(crate) const fn nesting(&self) -> u32 {
self.nesting
}
/// Returns the quote character for the current f-string.
pub(crate) const fn quote_char(&self) -> char {
if self.flags.contains(FStringContextFlags::DOUBLE) {
'"'
} else {
'\''
}
self.kind.quote_style().as_char()
}
/// Returns the triple quotes for the current f-string if it is a triple-quoted
/// f-string, `None` otherwise.
pub(crate) const fn triple_quotes(&self) -> Option<&'static str> {
if self.is_triple_quoted() {
if self.flags.contains(FStringContextFlags::DOUBLE) {
Some(r#"""""#)
} else {
Some("'''")
}
Some(self.kind.quote_str())
} else {
None
}
}
/// Returns `true` if the current f-string is a raw f-string.
pub(crate) const fn is_raw_string(&self) -> bool {
self.flags.contains(FStringContextFlags::RAW)
pub(crate) fn is_raw_string(&self) -> bool {
self.kind.is_raw_string()
}
/// Returns `true` if the current f-string is a triple-quoted f-string.
pub(crate) const fn is_triple_quoted(&self) -> bool {
self.flags.contains(FStringContextFlags::TRIPLE)
self.kind.is_triple_quoted()
}
/// Calculates the number of open parentheses for the current f-string

View file

@ -115,7 +115,8 @@ pub use parser::{
};
use ruff_python_ast::{Mod, PySourceType, Suite};
pub use string::FStringErrorType;
pub use token::{StringKind, Tok, TokenKind};
pub use string_token_flags::{QuoteStyle, StringKind};
pub use token::{Tok, TokenKind};
use crate::lexer::LexResult;
@ -127,6 +128,7 @@ pub mod lexer;
mod parser;
mod soft_keywords;
mod string;
mod string_token_flags;
mod token;
mod token_source;
pub mod typing;

View file

@ -12,7 +12,8 @@ use crate::{
function::{ArgumentList, parse_arguments, validate_pos_params, validate_arguments},
context::set_context,
string::{StringType, concatenated_strings, parse_fstring_literal_element, parse_string_literal},
token::{self, StringKind},
string_token_flags::StringKind,
token,
invalid,
};
use lalrpop_util::ParseError;
@ -1619,8 +1620,8 @@ StringLiteralOrFString: StringType = {
StringLiteral: StringType = {
<location:@L> <string:string> <end_location:@R> =>? {
let (source, kind, triple_quoted) = string;
Ok(parse_string_literal(source, kind, triple_quoted, (location..end_location).into())?)
let (source, kind) = string;
Ok(parse_string_literal(source, kind, (location..end_location).into())?)
}
};
@ -1636,8 +1637,8 @@ FStringExpr: StringType = {
FStringMiddlePattern: ast::FStringElement = {
FStringReplacementField,
<location:@L> <fstring_middle:fstring_middle> <end_location:@R> =>? {
let (source, is_raw, _) = fstring_middle;
Ok(parse_fstring_literal_element(source, is_raw, (location..end_location).into())?)
let (source, kind) = fstring_middle;
Ok(parse_fstring_literal_element(source, kind, (location..end_location).into())?)
}
};
@ -2001,7 +2002,7 @@ extern {
Dedent => token::Tok::Dedent,
StartModule => token::Tok::StartModule,
StartExpression => token::Tok::StartExpression,
FStringStart => token::Tok::FStringStart,
FStringStart => token::Tok::FStringStart(StringKind),
FStringEnd => token::Tok::FStringEnd,
"!" => token::Tok::Exclamation,
"?" => token::Tok::Question,
@ -2095,12 +2096,10 @@ extern {
string => token::Tok::String {
value: <Box<str>>,
kind: <StringKind>,
triple_quoted: <bool>
},
fstring_middle => token::Tok::FStringMiddle {
value: <Box<str>>,
is_raw: <bool>,
triple_quoted: <bool>
kind: <StringKind>,
},
name => token::Tok::Name { name: <Box<str>> },
ipy_escape_command => token::Tok::IpyEscapeCommand {

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -14,13 +20,22 @@ expression: lex_source(source)
(
String {
value: "",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
4..6,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
7..9,
),
(
@ -28,7 +43,13 @@ expression: lex_source(source)
9..10,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
11..13,
),
(
@ -38,13 +59,22 @@ expression: lex_source(source)
(
String {
value: "",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
15..17,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
),
18..22,
),
(
@ -52,7 +82,13 @@ expression: lex_source(source)
22..25,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
),
26..30,
),
(

View file

@ -6,8 +6,11 @@ expression: lex_source(source)
(
String {
value: "\\N{EN SPACE}",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
0..14,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
FStringMiddle {
value: "normal ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
2..9,
),
@ -32,8 +41,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " {another} ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
14..27,
),
@ -54,8 +66,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " {",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
32..35,
),
@ -76,8 +91,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
42..44,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
),
0..4,
),
(
FStringMiddle {
value: "\n# not a comment ",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
},
4..21,
),
@ -46,8 +55,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " # not a comment\n",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
},
42..59,
),

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -34,8 +40,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
7..8,
),
@ -70,8 +79,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
14..15,
),
@ -92,8 +104,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: ".3f!r",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
18..23,
),
@ -104,8 +119,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " {x!r}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
24..32,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
FStringMiddle {
value: "\\",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
2..3,
),
@ -32,8 +41,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "\\\"\\",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
6..9,
),
@ -58,8 +70,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " \\\"\\\"\\\n end",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
13..24,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
0..2,
),
(
FStringMiddle {
value: "\\",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
2..3,
),
@ -34,14 +43,23 @@ expression: lex_source(source)
8..9,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
10..12,
),
(
FStringMiddle {
value: "\\\\",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
12..14,
),
@ -64,14 +82,23 @@ expression: lex_source(source)
19..20,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
21..23,
),
(
FStringMiddle {
value: "\\{foo}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
23..31,
),
@ -80,14 +107,23 @@ expression: lex_source(source)
31..32,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
33..35,
),
(
FStringMiddle {
value: "\\\\{foo}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
35..44,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
0..3,
),
(
FStringMiddle {
value: "\\",
is_raw: true,
triple_quoted: false,
kind: StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
},
3..4,
),
@ -32,8 +41,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "\\\"\\",
is_raw: true,
triple_quoted: false,
kind: StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
},
7..10,
),
@ -58,8 +70,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " \\\"\\\"\\\n end",
is_raw: true,
triple_quoted: false,
kind: StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
},
14..25,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
FStringMiddle {
value: "first ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
2..8,
),
@ -58,8 +67,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " second",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
41..48,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
),
0..4,
),
(
FStringMiddle {
value: "\nhello\n world\n",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
},
4..21,
),
@ -20,14 +29,23 @@ expression: lex_source(source)
21..24,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
),
25..29,
),
(
FStringMiddle {
value: "\n world\nhello\n",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
29..46,
),
@ -36,14 +54,23 @@ expression: lex_source(source)
46..49,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
50..52,
),
(
FStringMiddle {
value: "some ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
52..57,
),
@ -52,14 +79,23 @@ expression: lex_source(source)
57..58,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
),
58..62,
),
(
FStringMiddle {
value: "multiline\nallowed ",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Double,
},
},
62..80,
),
@ -88,8 +124,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " string",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
87..94,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
FStringMiddle {
value: "\\N{BULLET} normal \\Nope \\N",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
2..28,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
0..3,
),
(
FStringMiddle {
value: "\\N",
is_raw: true,
triple_quoted: false,
kind: StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
},
3..5,
),
@ -32,8 +41,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " normal",
is_raw: true,
triple_quoted: false,
kind: StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
},
13..20,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
FStringMiddle {
value: "foo ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
2..6,
),
@ -20,14 +29,23 @@ expression: lex_source(source)
6..7,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
7..9,
),
(
FStringMiddle {
value: "bar ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
9..13,
),
@ -46,7 +64,13 @@ expression: lex_source(source)
16..17,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
18..20,
),
(
@ -82,8 +106,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " baz",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
29..33,
),
@ -92,14 +119,23 @@ expression: lex_source(source)
33..34,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
35..37,
),
(
FStringMiddle {
value: "foo ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
37..41,
),
@ -108,14 +144,23 @@ expression: lex_source(source)
41..42,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
42..44,
),
(
FStringMiddle {
value: "bar",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
44..47,
),
@ -130,8 +175,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " some ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
49..55,
),
@ -140,14 +188,23 @@ expression: lex_source(source)
55..56,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
56..58,
),
(
FStringMiddle {
value: "another",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
58..65,
),

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -20,14 +26,23 @@ expression: lex_source(source)
4..5,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
6..8,
),
(
FStringMiddle {
value: "{}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
8..12,
),
@ -36,14 +51,23 @@ expression: lex_source(source)
12..13,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
14..16,
),
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
16..17,
),
@ -60,14 +84,23 @@ expression: lex_source(source)
19..20,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
21..23,
),
(
FStringMiddle {
value: "{",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
23..25,
),
@ -82,8 +115,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
27..29,
),
@ -92,14 +128,23 @@ expression: lex_source(source)
29..30,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
31..33,
),
(
FStringMiddle {
value: "{{}}",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
33..41,
),
@ -108,14 +153,23 @@ expression: lex_source(source)
41..42,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
43..45,
),
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
45..46,
),
@ -130,8 +184,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " {} {",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
48..56,
),
@ -146,8 +203,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "} {{}} ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
58..71,
),

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -12,7 +18,13 @@ expression: lex_source(source)
2..3,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
4..6,
),
(
@ -20,7 +32,13 @@ expression: lex_source(source)
6..7,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
8..11,
),
(
@ -28,7 +46,13 @@ expression: lex_source(source)
11..12,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
13..16,
),
(
@ -36,7 +60,13 @@ expression: lex_source(source)
16..17,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
18..21,
),
(
@ -44,7 +74,13 @@ expression: lex_source(source)
21..22,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
23..26,
),
(
@ -52,7 +88,13 @@ expression: lex_source(source)
26..27,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
28..31,
),
(
@ -60,7 +102,13 @@ expression: lex_source(source)
31..32,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
33..36,
),
(
@ -68,7 +116,13 @@ expression: lex_source(source)
36..37,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
38..41,
),
(
@ -76,7 +130,13 @@ expression: lex_source(source)
41..42,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "rf",
triple_quoted: false,
quote_style: Double,
},
),
43..46,
),
(

View file

@ -4,14 +4,23 @@ expression: fstring_single_quote_escape_eol(MAC_EOL)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
0..2,
),
(
FStringMiddle {
value: "text \\\r more text",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
2..19,
),

View file

@ -4,14 +4,23 @@ expression: fstring_single_quote_escape_eol(UNIX_EOL)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
0..2,
),
(
FStringMiddle {
value: "text \\\n more text",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
2..19,
),

View file

@ -4,14 +4,23 @@ expression: fstring_single_quote_escape_eol(WINDOWS_EOL)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
0..2,
),
(
FStringMiddle {
value: "text \\\r\n more text",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
2..20,
),

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -28,8 +34,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
8..9,
),
@ -64,8 +73,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: ".3f",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
15..18,
),
@ -76,8 +88,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
19..20,
),
@ -98,8 +113,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: ".",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
23..24,
),
@ -120,8 +138,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "f",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
27..28,
),
@ -132,8 +153,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
29..30,
),
@ -144,8 +168,11 @@ expression: lex_source(source)
(
String {
value: "",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
31..33,
),
@ -156,8 +183,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "*^",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
34..36,
),
@ -200,8 +230,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
44..45,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
FStringMiddle {
value: "foo ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
2..6,
),
@ -36,8 +45,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " bar",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
12..16,
),

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -52,7 +58,13 @@ expression: lex_source(source)
17..18,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
18..20,
),
(

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
),
0..4,
),
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
4..6,
),
@ -36,8 +45,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "d\n",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
14..16,
),
@ -48,8 +60,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
17..19,
),
@ -62,14 +77,23 @@ expression: lex_source(source)
22..23,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
),
23..27,
),
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
27..29,
),
@ -94,8 +118,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "a\n b\n c\n",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
37..61,
),
@ -106,8 +133,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: true,
kind: StringKind {
prefix: "f",
triple_quoted: true,
quote_style: Single,
},
},
62..64,
),
@ -120,14 +150,23 @@ expression: lex_source(source)
67..68,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
68..70,
),
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
70..72,
),
@ -152,8 +191,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "d",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
80..81,
),
@ -168,8 +210,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
83..85,
),
@ -182,14 +227,23 @@ expression: lex_source(source)
86..87,
),
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
87..89,
),
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
89..91,
),
@ -214,8 +268,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "a",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
99..100,
),
@ -240,8 +297,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "__",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
112..114,
),

View file

@ -4,7 +4,13 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
),
0..2,
),
(
@ -24,8 +30,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: "=10",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
5..8,
),
@ -36,8 +45,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
9..10,
),
@ -76,8 +88,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
19..20,
),
@ -126,8 +141,11 @@ expression: lex_source(source)
(
FStringMiddle {
value: " ",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Double,
},
},
31..32,
),

View file

@ -4,14 +4,23 @@ expression: lex_source(source)
---
[
(
FStringStart,
FStringStart(
StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
),
0..2,
),
(
FStringMiddle {
value: "\\0",
is_raw: false,
triple_quoted: false,
kind: StringKind {
prefix: "f",
triple_quoted: false,
quote_style: Single,
},
},
2..4,
),

View file

@ -14,8 +14,11 @@ expression: lex_source(source)
(
String {
value: "a",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
6..9,
),
@ -26,8 +29,11 @@ expression: lex_source(source)
(
String {
value: "b",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
14..17,
),
@ -42,16 +48,22 @@ expression: lex_source(source)
(
String {
value: "c",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
23..26,
),
(
String {
value: "d",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
33..36,
),

View file

@ -6,72 +6,99 @@ expression: lex_source(source)
(
String {
value: "double",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
0..8,
),
(
String {
value: "single",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
9..17,
),
(
String {
value: "can\\'t",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
18..26,
),
(
String {
value: "\\\\\\\"",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
27..33,
),
(
String {
value: "\\t\\r\\n",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
34..42,
),
(
String {
value: "\\g",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
43..47,
),
(
String {
value: "raw\\'",
kind: RawString,
triple_quoted: false,
kind: StringKind {
prefix: "r",
triple_quoted: false,
quote_style: Single,
},
},
48..56,
),
(
String {
value: "\\420",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
57..63,
),
(
String {
value: "\\200\\0a",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Single,
},
},
64..73,
),

View file

@ -6,8 +6,11 @@ expression: string_continuation_with_eol(MAC_EOL)
(
String {
value: "abc\\\rdef",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
0..10,
),

View file

@ -6,8 +6,11 @@ expression: string_continuation_with_eol(UNIX_EOL)
(
String {
value: "abc\\\ndef",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
0..10,
),

View file

@ -6,8 +6,11 @@ expression: string_continuation_with_eol(WINDOWS_EOL)
(
String {
value: "abc\\\r\ndef",
kind: String,
triple_quoted: false,
kind: StringKind {
prefix: "",
triple_quoted: false,
quote_style: Double,
},
},
0..11,
),

View file

@ -6,8 +6,11 @@ expression: triple_quoted_eol(MAC_EOL)
(
String {
value: "\r test string\r ",
kind: String,
triple_quoted: true,
kind: StringKind {
prefix: "",
triple_quoted: true,
quote_style: Double,
},
},
0..21,
),

View file

@ -6,8 +6,11 @@ expression: triple_quoted_eol(UNIX_EOL)
(
String {
value: "\n test string\n ",
kind: String,
triple_quoted: true,
kind: StringKind {
prefix: "",
triple_quoted: true,
quote_style: Double,
},
},
0..21,
),

View file

@ -6,8 +6,11 @@ expression: triple_quoted_eol(WINDOWS_EOL)
(
String {
value: "\r\n test string\r\n ",
kind: String,
triple_quoted: true,
kind: StringKind {
prefix: "",
triple_quoted: true,
quote_style: Double,
},
},
0..23,
),

View file

@ -6,7 +6,8 @@ use ruff_python_ast::{self as ast, Expr};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::lexer::{LexicalError, LexicalErrorType};
use crate::token::{StringKind, Tok};
use crate::string_token_flags::StringKind;
use crate::token::Tok;
pub(crate) enum StringType {
Str(ast::StringLiteral),
@ -177,9 +178,9 @@ impl StringParser {
'v' => '\x0b',
o @ '0'..='7' => self.parse_octet(o as u8),
'x' => self.parse_unicode_literal(2)?,
'u' if !self.kind.is_any_bytes() => self.parse_unicode_literal(4)?,
'U' if !self.kind.is_any_bytes() => self.parse_unicode_literal(8)?,
'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
'u' if !self.kind.is_byte_string() => self.parse_unicode_literal(4)?,
'U' if !self.kind.is_byte_string() => self.parse_unicode_literal(8)?,
'N' if !self.kind.is_byte_string() => self.parse_unicode_name()?,
// Special cases where the escape sequence is not a single character
'\n' => return Ok(None),
'\r' => {
@ -190,7 +191,7 @@ impl StringParser {
return Ok(None);
}
_ => {
if self.kind.is_any_bytes() && !first_char.is_ascii() {
if self.kind.is_byte_string() && !first_char.is_ascii() {
return Err(LexicalError::new(
LexicalErrorType::OtherError(
"bytes can only contain ASCII literal characters"
@ -257,7 +258,7 @@ impl StringParser {
// This is still an invalid escape sequence, but we don't want to
// raise a syntax error as is done by the CPython parser. It might
// be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
b'\\' if !self.kind.is_raw() && self.peek_byte().is_some() => {
b'\\' if !self.kind.is_raw_string() && self.peek_byte().is_some() => {
match self.parse_escaped_char()? {
None => {}
Some(EscapedChar::Literal(c)) => value.push(c),
@ -302,7 +303,7 @@ impl StringParser {
));
}
if self.kind.is_raw() {
if self.kind.is_raw_string() {
// For raw strings, no escaping is necessary.
return Ok(StringType::Bytes(ast::BytesLiteral {
value: self.source.into_boxed_bytes(),
@ -355,11 +356,11 @@ impl StringParser {
}
fn parse_string(mut self) -> Result<StringType, LexicalError> {
if self.kind.is_raw() {
if self.kind.is_raw_string() {
// For raw strings, no escaping is necessary.
return Ok(StringType::Str(ast::StringLiteral {
value: self.source,
unicode: self.kind.is_unicode(),
unicode: self.kind.is_u_string(),
range: self.range,
}));
}
@ -368,7 +369,7 @@ impl StringParser {
// If the string doesn't contain any escape sequences, return the owned string.
return Ok(StringType::Str(ast::StringLiteral {
value: self.source,
unicode: self.kind.is_unicode(),
unicode: self.kind.is_u_string(),
range: self.range,
}));
};
@ -405,13 +406,13 @@ impl StringParser {
Ok(StringType::Str(ast::StringLiteral {
value: value.into_boxed_str(),
unicode: self.kind.is_unicode(),
unicode: self.kind.is_u_string(),
range: self.range,
}))
}
fn parse(self) -> Result<StringType, LexicalError> {
if self.kind.is_any_bytes() {
if self.kind.is_byte_string() {
self.parse_bytes()
} else {
self.parse_string()
@ -422,29 +423,16 @@ impl StringParser {
pub(crate) fn parse_string_literal(
source: Box<str>,
kind: StringKind,
triple_quoted: bool,
range: TextRange,
) -> Result<StringType, LexicalError> {
let start_location = range.start()
+ kind.prefix_len()
+ if triple_quoted {
TextSize::from(3)
} else {
TextSize::from(1)
};
StringParser::new(source, kind, start_location, range).parse()
StringParser::new(source, kind, range.start() + kind.opener_len(), range).parse()
}
pub(crate) fn parse_fstring_literal_element(
source: Box<str>,
is_raw: bool,
kind: StringKind,
range: TextRange,
) -> Result<ast::FStringElement, LexicalError> {
let kind = if is_raw {
StringKind::RawString
} else {
StringKind::String
};
StringParser::new(source, kind, range.start(), range).parse_fstring_middle()
}

View file

@ -0,0 +1,314 @@
use std::fmt;
use bitflags::bitflags;
use ruff_text_size::{TextLen, TextSize};
bitflags! {
/// Flags that can be queried to obtain information
/// regarding the prefixes and quotes used for a string literal.
///
/// Note that not all of these flags can be validly combined -- e.g.,
/// it is invalid to combine the `U_PREFIX` flag with any other
/// of the `*_PREFIX` flags. As such, the recommended way to set the
/// prefix flags is by calling the `as_flags()` method on the
/// `StringPrefix` enum.
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct StringFlags: u8 {
/// The string uses double quotes (`"`).
/// If this flag is not set, the string uses single quotes (`'`).
const DOUBLE = 1 << 0;
/// The string is triple-quoted:
/// it begins and ends with three consecutive quote characters.
const TRIPLE_QUOTED = 1 << 1;
/// The string has a `u` or `U` prefix.
/// While this prefix is a no-op at runtime,
/// strings with this prefix can have no other prefixes set.
const U_PREFIX = 1 << 2;
/// The string has a `b` or `B` prefix.
/// This means that the string is a sequence of `int`s at runtime,
/// rather than a sequence of `str`s.
/// Strings with this flag can also be raw strings,
/// but can have no other prefixes.
const B_PREFIX = 1 << 3;
/// The string has a `f` or `F` prefix, meaning it is an f-string.
/// F-strings can also be raw strings,
/// but can have no other prefixes.
const F_PREFIX = 1 << 4;
/// The string has an `r` or `R` prefix, meaning it is a raw string.
/// F-strings and byte-strings can be raw,
/// as can strings with no other prefixes.
/// U-strings cannot be raw.
const R_PREFIX = 1 << 5;
}
}
/// Enumeration of all the possible valid prefixes
/// prior to a Python string literal.
///
/// Using the `as_flags()` method on variants of this enum
/// is the recommended way to set `*_PREFIX` flags from the
/// `StringFlags` bitflag, as it means that you cannot accidentally
/// set a combination of `*_PREFIX` flags that would be invalid
/// at runtime in Python.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub(crate) enum StringPrefix {
/// The string has a `u` or `U` prefix.
/// While this prefix is a no-op at runtime,
/// strings with this prefix can have no other prefixes set.
Unicode,
/// The string has an `r` or `R` prefix, meaning it is a raw string.
/// F-strings and byte-strings can be raw,
/// as can strings with no other prefixes.
/// U-strings cannot be raw.
Raw,
/// The string has a `f` or `F` prefix, meaning it is an f-string.
/// F-strings can also be raw strings,
/// but can have no other prefixes.
Format,
/// The string has a `b` or `B` prefix.
/// This means that the string is a sequence of `int`s at runtime,
/// rather than a sequence of `str`s.
/// Bytestrings can also be raw strings,
/// but can have no other prefixes.
Bytes,
/// A string that has has any one of the prefixes
/// `{"rf", "rF", "Rf", "RF", "fr", "fR", "Fr", "FR"}`
/// Semantically, these all have the same meaning:
/// the string is both an f-string and a raw-string
RawFormat,
/// A string that has has any one of the prefixes
/// `{"rb", "rB", "Rb", "RB", "br", "bR", "Br", "BR"}`
/// Semantically, these all have the same meaning:
/// the string is both an bytestring and a raw-string
RawBytes,
}
impl TryFrom<char> for StringPrefix {
type Error = String;
fn try_from(value: char) -> Result<Self, String> {
let result = match value {
'r' | 'R' => Self::Raw,
'u' | 'U' => Self::Unicode,
'b' | 'B' => Self::Bytes,
'f' | 'F' => Self::Format,
_ => return Err(format!("Unexpected prefix '{value}'")),
};
Ok(result)
}
}
impl TryFrom<[char; 2]> for StringPrefix {
type Error = String;
fn try_from(value: [char; 2]) -> Result<Self, String> {
match value {
['r' | 'R', 'f' | 'F'] | ['f' | 'F', 'r' | 'R'] => Ok(Self::RawFormat),
['r' | 'R', 'b' | 'B'] | ['b' | 'B', 'r' | 'R'] => Ok(Self::RawBytes),
_ => Err(format!("Unexpected prefix '{}{}'", value[0], value[1])),
}
}
}
impl StringPrefix {
const fn as_flags(self) -> StringFlags {
match self {
Self::Bytes => StringFlags::B_PREFIX,
Self::Format => StringFlags::F_PREFIX,
Self::Raw => StringFlags::R_PREFIX,
Self::RawBytes => StringFlags::R_PREFIX.union(StringFlags::B_PREFIX),
Self::RawFormat => StringFlags::R_PREFIX.union(StringFlags::F_PREFIX),
Self::Unicode => StringFlags::U_PREFIX,
}
}
}
#[derive(Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StringKind(StringFlags);
impl StringKind {
pub(crate) const fn from_prefix(prefix: Option<StringPrefix>) -> Self {
if let Some(prefix) = prefix {
Self(prefix.as_flags())
} else {
Self(StringFlags::empty())
}
}
/// Does the string have a `u` or `U` prefix?
pub const fn is_u_string(self) -> bool {
self.0.contains(StringFlags::U_PREFIX)
}
/// Does the string have an `r` or `R` prefix?
pub const fn is_raw_string(self) -> bool {
self.0.contains(StringFlags::R_PREFIX)
}
/// Does the string have an `f` or `F` prefix?
pub const fn is_f_string(self) -> bool {
self.0.contains(StringFlags::F_PREFIX)
}
/// Does the string have a `b` or `B` prefix?
pub const fn is_byte_string(self) -> bool {
self.0.contains(StringFlags::B_PREFIX)
}
/// Does the string use single or double quotes in its opener and closer?
pub const fn quote_style(self) -> QuoteStyle {
if self.0.contains(StringFlags::DOUBLE) {
QuoteStyle::Double
} else {
QuoteStyle::Single
}
}
/// Is the string triple-quoted, i.e.,
/// does it begin and end with three consecutive quote characters?
pub const fn is_triple_quoted(self) -> bool {
self.0.contains(StringFlags::TRIPLE_QUOTED)
}
/// A `str` representation of the quotes used to start and close.
/// This does not include any prefixes the string has in its opener.
pub const fn quote_str(self) -> &'static str {
if self.is_triple_quoted() {
match self.quote_style() {
QuoteStyle::Single => "'''",
QuoteStyle::Double => r#"""""#,
}
} else {
match self.quote_style() {
QuoteStyle::Single => "'",
QuoteStyle::Double => "\"",
}
}
}
/// A `str` representation of the prefixes used (if any)
/// in the string's opener.
pub const fn prefix_str(self) -> &'static str {
if self.0.contains(StringFlags::F_PREFIX) {
if self.0.contains(StringFlags::R_PREFIX) {
return "rf";
}
return "f";
}
if self.0.contains(StringFlags::B_PREFIX) {
if self.0.contains(StringFlags::R_PREFIX) {
return "rb";
}
return "b";
}
if self.0.contains(StringFlags::R_PREFIX) {
return "r";
}
if self.0.contains(StringFlags::U_PREFIX) {
return "u";
}
""
}
/// The length of the prefixes used (if any) in the string's opener.
pub fn prefix_len(self) -> TextSize {
self.prefix_str().text_len()
}
/// The length of the quotes used to start and close the string.
/// This does not include the length of any prefixes the string has
/// in its opener.
pub const fn quote_len(self) -> TextSize {
if self.is_triple_quoted() {
TextSize::new(3)
} else {
TextSize::new(1)
}
}
/// The total length of the string's opener,
/// i.e., the length of the prefixes plus the length
/// of the quotes used to open the string.
pub fn opener_len(self) -> TextSize {
self.prefix_len() + self.quote_len()
}
/// The total length of the string's closer.
/// This is always equal to `self.quote_len()`,
/// but is provided here for symmetry with the `opener_len()` method.
pub const fn closer_len(self) -> TextSize {
self.quote_len()
}
pub fn format_string_contents(self, contents: &str) -> String {
format!(
"{}{}{}{}",
self.prefix_str(),
self.quote_str(),
contents,
self.quote_str()
)
}
#[must_use]
pub fn with_double_quotes(mut self) -> Self {
self.0 |= StringFlags::DOUBLE;
self
}
#[must_use]
pub fn with_triple_quotes(mut self) -> Self {
self.0 |= StringFlags::TRIPLE_QUOTED;
self
}
}
impl fmt::Debug for StringKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StringKind")
.field("prefix", &self.prefix_str())
.field("triple_quoted", &self.is_triple_quoted())
.field("quote_style", &self.quote_style())
.finish()
}
}
#[derive(Debug, Default, Copy, Clone, Hash, PartialEq, Eq)]
pub enum QuoteStyle {
/// E.g. '
Single,
/// E.g. "
#[default]
Double,
}
impl QuoteStyle {
pub const fn as_char(self) -> char {
match self {
Self::Single => '\'',
Self::Double => '"',
}
}
#[must_use]
pub const fn opposite(self) -> Self {
match self {
Self::Single => Self::Double,
Self::Double => Self::Single,
}
}
}

View file

@ -4,10 +4,10 @@
//! loosely based on the token definitions found in the [CPython source].
//!
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h;
use crate::string_token_flags::StringKind;
use crate::Mode;
use ruff_python_ast::{Int, IpyEscapeKind};
use ruff_text_size::TextSize;
use std::fmt;
/// The set of tokens the Python source code can be tokenized in.
@ -39,23 +39,21 @@ pub enum Tok {
String {
/// The string value.
value: Box<str>,
/// The kind of string.
/// Flags that can be queried to determine the quote style
/// and prefixes of the string
kind: StringKind,
/// Whether the string is triple quoted.
triple_quoted: bool,
},
/// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
/// and the opening quote(s).
FStringStart,
FStringStart(StringKind),
/// Token value that includes the portion of text inside the f-string that's not
/// part of the expression part and isn't an opening or closing brace.
FStringMiddle {
/// The string value.
value: Box<str>,
/// Whether the string is raw or not.
is_raw: bool,
/// Whether the string is triple quoted.
triple_quoted: bool,
/// Flags that can be queried to determine the quote style
/// and prefixes of the string
kind: StringKind,
},
/// Token value for the end of an f-string. This includes the closing quote.
FStringEnd,
@ -243,15 +241,10 @@ impl fmt::Display for Tok {
Int { value } => write!(f, "'{value}'"),
Float { value } => write!(f, "'{value}'"),
Complex { real, imag } => write!(f, "{real}j{imag}"),
String {
value,
kind,
triple_quoted,
} => {
let quotes = "\"".repeat(if *triple_quoted { 3 } else { 1 });
write!(f, "{kind}{quotes}{value}{quotes}")
String { value, kind } => {
write!(f, "{}", kind.format_string_contents(value))
}
FStringStart => f.write_str("FStringStart"),
FStringStart(_) => f.write_str("FStringStart"),
FStringMiddle { value, .. } => f.write_str(value),
FStringEnd => f.write_str("FStringEnd"),
IpyEscapeCommand { kind, value } => write!(f, "{kind}{value}"),
@ -354,103 +347,6 @@ impl fmt::Display for Tok {
}
}
/// The kind of string literal as described in the [String and Bytes literals]
/// section of the Python reference.
///
/// Note that f-strings are not included here, because as of [PEP 701] they
/// emit different tokens than other string literals.
///
/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
/// [PEP 701]: https://peps.python.org/pep-0701/
#[derive(PartialEq, Eq, Debug, Clone, Hash, Copy)] // TODO: is_macro::Is
pub enum StringKind {
/// A normal string literal with no prefix.
String,
/// A byte string literal, with a `b` or `B` prefix.
Bytes,
/// A raw string literal, with a `r` or `R` prefix.
RawString,
/// A raw byte string literal, with a `rb`/`br` or `rB`/`Br` or `Rb`/`bR` or `RB`/`BR` prefix.
RawBytes,
/// A unicode string literal, with a `u` or `U` prefix.
Unicode,
}
impl TryFrom<char> for StringKind {
type Error = String;
fn try_from(ch: char) -> Result<Self, String> {
match ch {
'r' | 'R' => Ok(StringKind::RawString),
'u' | 'U' => Ok(StringKind::Unicode),
'b' | 'B' => Ok(StringKind::Bytes),
c => Err(format!("Unexpected string prefix: {c}")),
}
}
}
impl TryFrom<[char; 2]> for StringKind {
type Error = String;
fn try_from(chars: [char; 2]) -> Result<Self, String> {
match chars {
['r' | 'R', 'b' | 'B'] => Ok(StringKind::RawBytes),
['b' | 'B', 'r' | 'R'] => Ok(StringKind::RawBytes),
[c1, c2] => Err(format!("Unexpected string prefix: {c1}{c2}")),
}
}
}
impl fmt::Display for StringKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl StringKind {
/// Returns true if the string is a raw string, i,e one of
/// [`StringKind::RawString`] or [`StringKind::RawBytes`].
pub fn is_raw(&self) -> bool {
use StringKind::{RawBytes, RawString};
matches!(self, RawString | RawBytes)
}
/// Returns true if the string is a byte string, i,e one of
/// [`StringKind::Bytes`] or [`StringKind::RawBytes`].
pub fn is_any_bytes(&self) -> bool {
use StringKind::{Bytes, RawBytes};
matches!(self, Bytes | RawBytes)
}
/// Returns true if the string is a unicode string, i,e [`StringKind::Unicode`].
pub fn is_unicode(&self) -> bool {
matches!(self, StringKind::Unicode)
}
/// Returns the number of characters in the prefix.
pub fn prefix_len(&self) -> TextSize {
use StringKind::{Bytes, RawBytes, RawString, String, Unicode};
let len = match self {
String => 0,
RawString | Unicode | Bytes => 1,
RawBytes => 2,
};
len.into()
}
pub fn as_str(&self) -> &'static str {
use StringKind::{Bytes, RawBytes, RawString, String, Unicode};
match self {
String => "",
Bytes => "b",
RawString => "r",
RawBytes => "rb",
Unicode => "u",
}
}
}
// TODO move to ruff_python_parser?
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum TokenKind {
/// Token value for a name, commonly known as an identifier.
@ -804,7 +700,7 @@ impl TokenKind {
Tok::Float { .. } => TokenKind::Float,
Tok::Complex { .. } => TokenKind::Complex,
Tok::String { .. } => TokenKind::String,
Tok::FStringStart => TokenKind::FStringStart,
Tok::FStringStart(_) => TokenKind::FStringStart,
Tok::FStringMiddle { .. } => TokenKind::FStringMiddle,
Tok::FStringEnd => TokenKind::FStringEnd,
Tok::IpyEscapeCommand { .. } => TokenKind::EscapeCommand,