mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-15 08:00:46 +00:00
Track quoting style in the tokenizer (#10256)
This commit is contained in:
parent
72c9f7e4c9
commit
c504d7ab11
55 changed files with 4595 additions and 3800 deletions
|
@ -6,7 +6,8 @@ use ruff_python_ast::{self as ast, Expr};
|
|||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::lexer::{LexicalError, LexicalErrorType};
|
||||
use crate::token::{StringKind, Tok};
|
||||
use crate::string_token_flags::StringKind;
|
||||
use crate::token::Tok;
|
||||
|
||||
pub(crate) enum StringType {
|
||||
Str(ast::StringLiteral),
|
||||
|
@ -177,9 +178,9 @@ impl StringParser {
|
|||
'v' => '\x0b',
|
||||
o @ '0'..='7' => self.parse_octet(o as u8),
|
||||
'x' => self.parse_unicode_literal(2)?,
|
||||
'u' if !self.kind.is_any_bytes() => self.parse_unicode_literal(4)?,
|
||||
'U' if !self.kind.is_any_bytes() => self.parse_unicode_literal(8)?,
|
||||
'N' if !self.kind.is_any_bytes() => self.parse_unicode_name()?,
|
||||
'u' if !self.kind.is_byte_string() => self.parse_unicode_literal(4)?,
|
||||
'U' if !self.kind.is_byte_string() => self.parse_unicode_literal(8)?,
|
||||
'N' if !self.kind.is_byte_string() => self.parse_unicode_name()?,
|
||||
// Special cases where the escape sequence is not a single character
|
||||
'\n' => return Ok(None),
|
||||
'\r' => {
|
||||
|
@ -190,7 +191,7 @@ impl StringParser {
|
|||
return Ok(None);
|
||||
}
|
||||
_ => {
|
||||
if self.kind.is_any_bytes() && !first_char.is_ascii() {
|
||||
if self.kind.is_byte_string() && !first_char.is_ascii() {
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::OtherError(
|
||||
"bytes can only contain ASCII literal characters"
|
||||
|
@ -257,7 +258,7 @@ impl StringParser {
|
|||
// This is still an invalid escape sequence, but we don't want to
|
||||
// raise a syntax error as is done by the CPython parser. It might
|
||||
// be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
|
||||
b'\\' if !self.kind.is_raw() && self.peek_byte().is_some() => {
|
||||
b'\\' if !self.kind.is_raw_string() && self.peek_byte().is_some() => {
|
||||
match self.parse_escaped_char()? {
|
||||
None => {}
|
||||
Some(EscapedChar::Literal(c)) => value.push(c),
|
||||
|
@ -302,7 +303,7 @@ impl StringParser {
|
|||
));
|
||||
}
|
||||
|
||||
if self.kind.is_raw() {
|
||||
if self.kind.is_raw_string() {
|
||||
// For raw strings, no escaping is necessary.
|
||||
return Ok(StringType::Bytes(ast::BytesLiteral {
|
||||
value: self.source.into_boxed_bytes(),
|
||||
|
@ -355,11 +356,11 @@ impl StringParser {
|
|||
}
|
||||
|
||||
fn parse_string(mut self) -> Result<StringType, LexicalError> {
|
||||
if self.kind.is_raw() {
|
||||
if self.kind.is_raw_string() {
|
||||
// For raw strings, no escaping is necessary.
|
||||
return Ok(StringType::Str(ast::StringLiteral {
|
||||
value: self.source,
|
||||
unicode: self.kind.is_unicode(),
|
||||
unicode: self.kind.is_u_string(),
|
||||
range: self.range,
|
||||
}));
|
||||
}
|
||||
|
@ -368,7 +369,7 @@ impl StringParser {
|
|||
// If the string doesn't contain any escape sequences, return the owned string.
|
||||
return Ok(StringType::Str(ast::StringLiteral {
|
||||
value: self.source,
|
||||
unicode: self.kind.is_unicode(),
|
||||
unicode: self.kind.is_u_string(),
|
||||
range: self.range,
|
||||
}));
|
||||
};
|
||||
|
@ -405,13 +406,13 @@ impl StringParser {
|
|||
|
||||
Ok(StringType::Str(ast::StringLiteral {
|
||||
value: value.into_boxed_str(),
|
||||
unicode: self.kind.is_unicode(),
|
||||
unicode: self.kind.is_u_string(),
|
||||
range: self.range,
|
||||
}))
|
||||
}
|
||||
|
||||
fn parse(self) -> Result<StringType, LexicalError> {
|
||||
if self.kind.is_any_bytes() {
|
||||
if self.kind.is_byte_string() {
|
||||
self.parse_bytes()
|
||||
} else {
|
||||
self.parse_string()
|
||||
|
@ -422,29 +423,16 @@ impl StringParser {
|
|||
pub(crate) fn parse_string_literal(
|
||||
source: Box<str>,
|
||||
kind: StringKind,
|
||||
triple_quoted: bool,
|
||||
range: TextRange,
|
||||
) -> Result<StringType, LexicalError> {
|
||||
let start_location = range.start()
|
||||
+ kind.prefix_len()
|
||||
+ if triple_quoted {
|
||||
TextSize::from(3)
|
||||
} else {
|
||||
TextSize::from(1)
|
||||
};
|
||||
StringParser::new(source, kind, start_location, range).parse()
|
||||
StringParser::new(source, kind, range.start() + kind.opener_len(), range).parse()
|
||||
}
|
||||
|
||||
pub(crate) fn parse_fstring_literal_element(
|
||||
source: Box<str>,
|
||||
is_raw: bool,
|
||||
kind: StringKind,
|
||||
range: TextRange,
|
||||
) -> Result<ast::FStringElement, LexicalError> {
|
||||
let kind = if is_raw {
|
||||
StringKind::RawString
|
||||
} else {
|
||||
StringKind::String
|
||||
};
|
||||
StringParser::new(source, kind, range.start(), range).parse_fstring_middle()
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue