ruff/crates/ruff_python_parser/src/string.rs
Dhruv Manilawala 6ecb4776de
Rename AnyStringKind -> AnyStringFlags (#11405)
## Summary

This PR renames `AnyStringKind` to `AnyStringFlags` and `AnyStringFlags`
to `AnyStringFlagsInner`.

The main motivation is to have consistent usage of "kind" and "flags".
For each string kind, it's "flags" like `StringLiteralFlags`,
`BytesLiteralFlags`, and `FStringFlags` but it was `AnyStringKind` for
the "any" variant.
2024-05-13 13:18:07 +00:00

872 lines
29 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Parsing of string literals, bytes literals, and implicit string concatenation.
use bstr::ByteSlice;
use ruff_python_ast::{self as ast, AnyStringFlags, Expr};
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::lexer::{LexicalError, LexicalErrorType};
#[derive(Debug)]
pub(crate) enum StringType {
Str(ast::StringLiteral),
Bytes(ast::BytesLiteral),
FString(ast::FString),
}
impl Ranged for StringType {
fn range(&self) -> TextRange {
match self {
Self::Str(node) => node.range(),
Self::Bytes(node) => node.range(),
Self::FString(node) => node.range(),
}
}
}
impl From<StringType> for Expr {
fn from(string: StringType) -> Self {
match string {
StringType::Str(node) => Expr::from(node),
StringType::Bytes(node) => Expr::from(node),
StringType::FString(node) => Expr::from(node),
}
}
}
enum EscapedChar {
Literal(char),
Escape(char),
}
struct StringParser {
/// The raw content of the string e.g., the `foo` part in `"foo"`.
source: Box<str>,
/// Current position of the parser in the source.
cursor: usize,
/// Flags that can be used to query information about the string.
flags: AnyStringFlags,
/// The location of the first character in the source from the start of the file.
offset: TextSize,
/// The range of the string literal.
range: TextRange,
}
impl StringParser {
fn new(source: Box<str>, flags: AnyStringFlags, offset: TextSize, range: TextRange) -> Self {
Self {
source,
cursor: 0,
flags,
offset,
range,
}
}
#[inline]
fn skip_bytes(&mut self, bytes: usize) -> &str {
let skipped_str = &self.source[self.cursor..self.cursor + bytes];
self.cursor += bytes;
skipped_str
}
/// Returns the current position of the parser considering the offset.
#[inline]
fn position(&self) -> TextSize {
self.compute_position(self.cursor)
}
/// Computes the position of the cursor considering the offset.
#[inline]
fn compute_position(&self, cursor: usize) -> TextSize {
self.offset + TextSize::try_from(cursor).unwrap()
}
/// Returns the next byte in the string, if there is one.
///
/// # Panics
///
/// When the next byte is a part of a multi-byte character.
#[inline]
fn next_byte(&mut self) -> Option<u8> {
self.source[self.cursor..].as_bytes().first().map(|&byte| {
self.cursor += 1;
byte
})
}
#[inline]
fn next_char(&mut self) -> Option<char> {
self.source[self.cursor..].chars().next().map(|c| {
self.cursor += c.len_utf8();
c
})
}
#[inline]
fn peek_byte(&self) -> Option<u8> {
self.source[self.cursor..].as_bytes().first().copied()
}
fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
let mut p: u32 = 0u32;
for i in 1..=literal_number {
let start = self.position();
match self.next_char() {
Some(c) => match c.to_digit(16) {
Some(d) => p += d << ((literal_number - i) * 4),
None => {
return Err(LexicalError::new(
LexicalErrorType::UnicodeError,
TextRange::at(start, TextSize::try_from(c.len_utf8()).unwrap()),
));
}
},
None => {
return Err(LexicalError::new(
LexicalErrorType::UnicodeError,
TextRange::empty(self.position()),
))
}
}
}
match p {
0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
_ => std::char::from_u32(p).ok_or(LexicalError::new(
LexicalErrorType::UnicodeError,
TextRange::empty(self.position()),
)),
}
}
fn parse_octet(&mut self, o: u8) -> char {
let mut radix_bytes = [o, 0, 0];
let mut len = 1;
while len < 3 {
let Some(b'0'..=b'7') = self.peek_byte() else {
break;
};
radix_bytes[len] = self.next_byte().unwrap();
len += 1;
}
// OK because radix_bytes is always going to be in the ASCII range.
let radix_str = std::str::from_utf8(&radix_bytes[..len]).expect("ASCII bytes");
let value = u32::from_str_radix(radix_str, 8).unwrap();
char::from_u32(value).unwrap()
}
fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
let start_pos = self.position();
let Some('{') = self.next_char() else {
return Err(LexicalError::new(
LexicalErrorType::MissingUnicodeLbrace,
TextRange::empty(start_pos),
));
};
let start_pos = self.position();
let Some(close_idx) = self.source[self.cursor..].find('}') else {
return Err(LexicalError::new(
LexicalErrorType::MissingUnicodeRbrace,
TextRange::empty(self.compute_position(self.source.len())),
));
};
let name_and_ending = self.skip_bytes(close_idx + 1);
let name = &name_and_ending[..name_and_ending.len() - 1];
unicode_names2::character(name).ok_or_else(|| {
LexicalError::new(
LexicalErrorType::UnicodeError,
// The cursor is right after the `}` character, so we subtract 1 to get the correct
// range of the unicode name.
TextRange::new(
start_pos,
self.compute_position(self.cursor - '}'.len_utf8()),
),
)
})
}
/// Parse an escaped character, returning the new character.
fn parse_escaped_char(&mut self) -> Result<Option<EscapedChar>, LexicalError> {
let Some(first_char) = self.next_char() else {
// TODO: check when this error case happens
return Err(LexicalError::new(
LexicalErrorType::StringError,
TextRange::empty(self.position()),
));
};
let new_char = match first_char {
'\\' => '\\',
'\'' => '\'',
'\"' => '"',
'a' => '\x07',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'v' => '\x0b',
o @ '0'..='7' => self.parse_octet(o as u8),
'x' => self.parse_unicode_literal(2)?,
'u' if !self.flags.is_byte_string() => self.parse_unicode_literal(4)?,
'U' if !self.flags.is_byte_string() => self.parse_unicode_literal(8)?,
'N' if !self.flags.is_byte_string() => self.parse_unicode_name()?,
// Special cases where the escape sequence is not a single character
'\n' => return Ok(None),
'\r' => {
if self.peek_byte() == Some(b'\n') {
self.next_byte();
}
return Ok(None);
}
_ => return Ok(Some(EscapedChar::Escape(first_char))),
};
Ok(Some(EscapedChar::Literal(new_char)))
}
fn parse_fstring_middle(mut self) -> Result<ast::FStringLiteralElement, LexicalError> {
// Fast-path: if the f-string doesn't contain any escape sequences, return the literal.
let Some(mut index) = memchr::memchr3(b'{', b'}', b'\\', self.source.as_bytes()) else {
return Ok(ast::FStringLiteralElement {
value: self.source,
range: self.range,
});
};
let mut value = String::with_capacity(self.source.len());
loop {
// Add the characters before the escape sequence (or curly brace) to the string.
let before_with_slash_or_brace = self.skip_bytes(index + 1);
let before = &before_with_slash_or_brace[..before_with_slash_or_brace.len() - 1];
value.push_str(before);
// Add the escaped character to the string.
match &self.source.as_bytes()[self.cursor - 1] {
// If there are any curly braces inside a `FStringMiddle` token,
// then they were escaped (i.e. `{{` or `}}`). This means that
// we need increase the location by 2 instead of 1.
b'{' => {
self.offset += TextSize::from(1);
value.push('{');
}
b'}' => {
self.offset += TextSize::from(1);
value.push('}');
}
// We can encounter a `\` as the last character in a `FStringMiddle`
// token which is valid in this context. For example,
//
// ```python
// f"\{foo} \{bar:\}"
// # ^ ^^ ^
// ```
//
// Here, the `FStringMiddle` token content will be "\" and " \"
// which is invalid if we look at the content in isolation:
//
// ```python
// "\"
// ```
//
// However, the content is syntactically valid in the context of
// the f-string because it's a substring of the entire f-string.
// This is still an invalid escape sequence, but we don't want to
// raise a syntax error as is done by the CPython parser. It might
// be supported in the future, refer to point 3: https://peps.python.org/pep-0701/#rejected-ideas
b'\\' => {
if !self.flags.is_raw_string() && self.peek_byte().is_some() {
match self.parse_escaped_char()? {
None => {}
Some(EscapedChar::Literal(c)) => value.push(c),
Some(EscapedChar::Escape(c)) => {
value.push('\\');
value.push(c);
}
}
} else {
value.push('\\');
}
}
ch => {
unreachable!("Expected '{{', '}}', or '\\' but got {:?}", ch);
}
}
let Some(next_index) =
memchr::memchr3(b'{', b'}', b'\\', self.source[self.cursor..].as_bytes())
else {
// Add the rest of the string to the value.
let rest = &self.source[self.cursor..];
value.push_str(rest);
break;
};
index = next_index;
}
Ok(ast::FStringLiteralElement {
value: value.into_boxed_str(),
range: self.range,
})
}
fn parse_bytes(mut self) -> Result<StringType, LexicalError> {
if let Some(index) = self.source.as_bytes().find_non_ascii_byte() {
let ch = self.source.chars().nth(index).unwrap();
return Err(LexicalError::new(
LexicalErrorType::InvalidByteLiteral,
TextRange::at(
self.compute_position(index),
TextSize::try_from(ch.len_utf8()).unwrap(),
),
));
}
if self.flags.is_raw_string() {
// For raw strings, no escaping is necessary.
return Ok(StringType::Bytes(ast::BytesLiteral {
value: self.source.into_boxed_bytes(),
range: self.range,
flags: self.flags.into(),
}));
}
let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
// If the string doesn't contain any escape sequences, return the owned string.
return Ok(StringType::Bytes(ast::BytesLiteral {
value: self.source.into_boxed_bytes(),
range: self.range,
flags: self.flags.into(),
}));
};
// If the string contains escape sequences, we need to parse them.
let mut value = Vec::with_capacity(self.source.len());
loop {
// Add the characters before the escape sequence to the string.
let before_with_slash = self.skip_bytes(escape + 1);
let before = &before_with_slash[..before_with_slash.len() - 1];
value.extend_from_slice(before.as_bytes());
// Add the escaped character to the string.
match self.parse_escaped_char()? {
None => {}
Some(EscapedChar::Literal(c)) => value.push(c as u8),
Some(EscapedChar::Escape(c)) => {
value.push(b'\\');
value.push(c as u8);
}
}
let Some(next_escape) = memchr::memchr(b'\\', self.source[self.cursor..].as_bytes())
else {
// Add the rest of the string to the value.
let rest = &self.source[self.cursor..];
value.extend_from_slice(rest.as_bytes());
break;
};
// Update the position of the next escape sequence.
escape = next_escape;
}
Ok(StringType::Bytes(ast::BytesLiteral {
value: value.into_boxed_slice(),
range: self.range,
flags: self.flags.into(),
}))
}
fn parse_string(mut self) -> Result<StringType, LexicalError> {
if self.flags.is_raw_string() {
// For raw strings, no escaping is necessary.
return Ok(StringType::Str(ast::StringLiteral {
value: self.source,
range: self.range,
flags: self.flags.into(),
}));
}
let Some(mut escape) = memchr::memchr(b'\\', self.source.as_bytes()) else {
// If the string doesn't contain any escape sequences, return the owned string.
return Ok(StringType::Str(ast::StringLiteral {
value: self.source,
range: self.range,
flags: self.flags.into(),
}));
};
// If the string contains escape sequences, we need to parse them.
let mut value = String::with_capacity(self.source.len());
loop {
// Add the characters before the escape sequence to the string.
let before_with_slash = self.skip_bytes(escape + 1);
let before = &before_with_slash[..before_with_slash.len() - 1];
value.push_str(before);
// Add the escaped character to the string.
match self.parse_escaped_char()? {
None => {}
Some(EscapedChar::Literal(c)) => value.push(c),
Some(EscapedChar::Escape(c)) => {
value.push('\\');
value.push(c);
}
}
let Some(next_escape) = self.source[self.cursor..].find('\\') else {
// Add the rest of the string to the value.
let rest = &self.source[self.cursor..];
value.push_str(rest);
break;
};
// Update the position of the next escape sequence.
escape = next_escape;
}
Ok(StringType::Str(ast::StringLiteral {
value: value.into_boxed_str(),
range: self.range,
flags: self.flags.into(),
}))
}
fn parse(self) -> Result<StringType, LexicalError> {
if self.flags.is_byte_string() {
self.parse_bytes()
} else {
self.parse_string()
}
}
}
pub(crate) fn parse_string_literal(
source: Box<str>,
flags: AnyStringFlags,
range: TextRange,
) -> Result<StringType, LexicalError> {
StringParser::new(source, flags, range.start() + flags.opener_len(), range).parse()
}
// TODO(dhruvmanila): Move this to the new parser
pub(crate) fn parse_fstring_literal_element(
source: Box<str>,
flags: AnyStringFlags,
range: TextRange,
) -> Result<ast::FStringLiteralElement, LexicalError> {
StringParser::new(source, flags, range.start(), range).parse_fstring_middle()
}
#[cfg(test)]
mod tests {
use crate::lexer::LexicalErrorType;
use crate::{parse_suite, FStringErrorType, ParseErrorType, Suite};
const WINDOWS_EOL: &str = "\r\n";
const MAC_EOL: &str = "\r";
const UNIX_EOL: &str = "\n";
fn string_parser_escaped_eol(eol: &str) -> Suite {
let source = format!(r"'text \{eol}more text'");
parse_suite(&source).unwrap()
}
#[test]
fn test_string_parser_escaped_unix_eol() {
let parse_ast = string_parser_escaped_eol(UNIX_EOL);
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_string_parser_escaped_mac_eol() {
let parse_ast = string_parser_escaped_eol(MAC_EOL);
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_string_parser_escaped_windows_eol() {
let parse_ast = string_parser_escaped_eol(WINDOWS_EOL);
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring() {
let source = r#"f"{a}{ b }{{foo}}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_spec() {
let source = r#"f"{foo:{spec}}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_not_nested_spec() {
let source = r#"f"{foo:spec}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_empty_fstring() {
insta::assert_debug_snapshot!(parse_suite(r#"f"""#,).unwrap());
}
#[test]
fn test_fstring_parse_self_documenting_base() {
let source = r#"f"{user=}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_parse_self_documenting_base_more() {
let source = r#"f"mix {user=} with text and {second=}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_parse_self_documenting_format() {
let source = r#"f"{user=:>10}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
fn parse_fstring_error(source: &str) -> FStringErrorType {
parse_suite(source)
.map_err(|e| match e.error {
ParseErrorType::Lexical(LexicalErrorType::FStringError(e)) => e,
ParseErrorType::FStringError(e) => e,
e => unreachable!("Expected FStringError: {:?}", e),
})
.expect_err("Expected error")
}
#[test]
fn test_parse_invalid_fstring() {
use FStringErrorType::{InvalidConversionFlag, LambdaWithoutParentheses};
assert_eq!(parse_fstring_error(r#"f"{5!x}""#), InvalidConversionFlag);
assert_eq!(
parse_fstring_error("f'{lambda x:{x}}'"),
LambdaWithoutParentheses
);
// NOTE: The parser produces the `LambdaWithoutParentheses` for this case, but
// since the parser only return the first error to maintain compatibility with
// the rest of the codebase, this test case fails. The `LambdaWithoutParentheses`
// error appears after the unexpected `FStringMiddle` token, which is between the
// `:` and the `{`.
// assert_eq!(parse_fstring_error("f'{lambda x: {x}}'"), LambdaWithoutParentheses);
assert!(parse_suite(r#"f"{class}""#,).is_err());
}
#[test]
fn test_parse_fstring_not_equals() {
let source = r#"f"{1 != 2}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_equals() {
let source = r#"f"{42 == 42}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_self_doc_prec_space() {
let source = r#"f"{x =}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_self_doc_trailing_space() {
let source = r#"f"{x= }""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_yield_expr() {
let source = r#"f"{yield}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_string_concat() {
let source = "'Hello ' 'world'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_1() {
let source = "'Hello ' u'world'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_string_concat_2() {
let source = "u'Hello ' 'world'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_1() {
let source = "'Hello ' f'world'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_2() {
let source = "'Hello ' f'world'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_3() {
let source = "'Hello ' f'world{\"!\"}'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_4() {
let source = "'Hello ' f'world{\"!\"}' 'again!'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_1() {
let source = "u'Hello ' f'world'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_2() {
let source = "u'Hello ' f'world' '!'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_string_triple_quotes_with_kind() {
let source = "u'''Hello, world!'''";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_single_quoted_byte() {
// single quote
let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_double_quoted_byte() {
// double quote
let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_escape_char_in_byte_literal() {
// backslash does not escape
let source = r#"b"omkmok\Xaa""#; // spell-checker:ignore omkmok
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_raw_byte_literal_1() {
let source = r"rb'\x1z'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_raw_byte_literal_2() {
let source = r"rb'\\'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_escape_octet() {
let source = r"b'\43a\4\1234'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_escaped_newline() {
let source = r#"f"\n{x}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_constant_range() {
let source = r#"f"aaa{bbb}ccc{ddd}eee""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_unescaped_newline() {
let source = r#"f"""
{x}""""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_escaped_character() {
let source = r#"f"\\{x}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_raw_fstring() {
let source = r#"rf"{x}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_triple_quoted_raw_fstring() {
let source = r#"rf"""{x}""""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_fstring_line_continuation() {
let source = r#"rf"\
{x}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_string_spec() {
let source = r#"f"{foo:{''}}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_concatenation_string_spec() {
let source = r#"f"{foo:{'' ''}}""#;
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
/// <https://github.com/astral-sh/ruff/issues/8355>
#[test]
fn test_dont_panic_on_8_in_octal_escape() {
let source = r"bold = '\038[1m'";
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_invalid_unicode_literal() {
let source = r"'\x1ó34'";
let error = parse_suite(source).unwrap_err();
insta::assert_debug_snapshot!(error);
}
#[test]
fn test_missing_unicode_lbrace_error() {
let source = r"'\N '";
let error = parse_suite(source).unwrap_err();
insta::assert_debug_snapshot!(error);
}
#[test]
fn test_missing_unicode_rbrace_error() {
let source = r"'\N{SPACE'";
let error = parse_suite(source).unwrap_err();
insta::assert_debug_snapshot!(error);
}
#[test]
fn test_invalid_unicode_name_error() {
let source = r"'\N{INVALID}'";
let error = parse_suite(source).unwrap_err();
insta::assert_debug_snapshot!(error);
}
#[test]
fn test_invalid_byte_literal_error() {
let source = r"b'123a𝐁c'";
let error = parse_suite(source).unwrap_err();
insta::assert_debug_snapshot!(error);
}
macro_rules! test_aliases_parse {
($($name:ident: $alias:expr,)*) => {
$(
#[test]
fn $name() {
let source = format!(r#""\N{{{0}}}""#, $alias);
let parse_ast = parse_suite(&source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
)*
}
}
test_aliases_parse! {
test_backspace_alias: "BACKSPACE",
test_bell_alias: "BEL",
test_carriage_return_alias: "CARRIAGE RETURN",
test_delete_alias: "DELETE",
test_escape_alias: "ESCAPE",
test_form_feed_alias: "FORM FEED",
test_hts_alias: "HTS",
test_character_tabulation_with_justification_alias: "CHARACTER TABULATION WITH JUSTIFICATION",
}
}