Track casing of r-string prefixes in the tokenizer and AST (#10314)

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Alex Waygood 2024-03-18 17:18:04 +00:00 committed by GitHub
parent 31db1b6e16
commit 162d2eb723
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
105 changed files with 1068 additions and 503 deletions

View file

@ -34,7 +34,7 @@ use std::{char, cmp::Ordering, str::FromStr};
use unicode_ident::{is_xid_continue, is_xid_start};
use unicode_normalization::UnicodeNormalization;
use ruff_python_ast::{Int, IpyEscapeKind};
use ruff_python_ast::{FStringPrefix, Int, IpyEscapeKind};
use ruff_text_size::{TextLen, TextRange, TextSize};
use crate::lexer::cursor::{Cursor, EOF_CHAR};
@ -175,24 +175,29 @@ impl<'source> Lexer<'source> {
match (first, self.cursor.first()) {
('f' | 'F', quote @ ('\'' | '"')) => {
self.cursor.bump();
return Ok(self.lex_fstring_start(quote, false));
return Ok(self.lex_fstring_start(quote, FStringPrefix::Regular));
}
('r' | 'R', 'f' | 'F') | ('f' | 'F', 'r' | 'R') if is_quote(self.cursor.second()) => {
('r', 'f' | 'F') | ('f' | 'F', 'r') if is_quote(self.cursor.second()) => {
self.cursor.bump();
let quote = self.cursor.bump().unwrap();
return Ok(self.lex_fstring_start(quote, true));
return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: false }));
}
('R', 'f' | 'F') | ('f' | 'F', 'R') if is_quote(self.cursor.second()) => {
self.cursor.bump();
let quote = self.cursor.bump().unwrap();
return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true }));
}
(_, quote @ ('\'' | '"')) => {
if let Ok(prefix) = StringPrefix::try_from(first) {
self.cursor.bump();
return self.lex_string(Some(prefix), quote);
return self.lex_string(prefix, quote);
}
}
(_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
self.cursor.bump();
if let Ok(prefix) = StringPrefix::try_from([first, second]) {
let quote = self.cursor.bump().unwrap();
return self.lex_string(Some(prefix), quote);
return self.lex_string(prefix, quote);
}
}
_ => {}
@ -551,15 +556,11 @@ impl<'source> Lexer<'source> {
}
/// Lex a f-string start token.
fn lex_fstring_start(&mut self, quote: char, is_raw_string: bool) -> Tok {
fn lex_fstring_start(&mut self, quote: char, prefix: FStringPrefix) -> Tok {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
let mut kind = StringKind::from_prefix(Some(if is_raw_string {
StringPrefix::RawFormat
} else {
StringPrefix::Format
}));
let mut kind = StringKind::from_prefix(StringPrefix::Format(prefix));
if quote == '"' {
kind = kind.with_double_quotes();
@ -707,11 +708,7 @@ impl<'source> Lexer<'source> {
}
/// Lex a string literal.
fn lex_string(
&mut self,
prefix: Option<StringPrefix>,
quote: char,
) -> Result<Tok, LexicalError> {
fn lex_string(&mut self, prefix: StringPrefix, quote: char) -> Result<Tok, LexicalError> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), quote);
@ -1085,7 +1082,7 @@ impl<'source> Lexer<'source> {
c if is_ascii_identifier_start(c) => self.lex_identifier(c)?,
'0'..='9' => self.lex_number(c)?,
'#' => return Ok((self.lex_comment(), self.token_range())),
'\'' | '"' => self.lex_string(None, c)?,
'\'' | '"' => self.lex_string(StringPrefix::default(), c)?,
'=' => {
if self.cursor.eat_char('=') {
Tok::EqEqual