mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-13 16:15:16 +00:00
Fix FormattedValue location
This commit is contained in:
parent
4e00ba2c50
commit
faec9372f9
22 changed files with 2195 additions and 355 deletions
|
@ -1339,18 +1339,11 @@ OneOrMore<T>: Vec<T> = {
|
|||
};
|
||||
|
||||
Constant: ast::Constant = {
|
||||
<b:bytes+> => ast::Constant::Bytes(b.into_iter().flatten().collect()),
|
||||
<value:int> => ast::Constant::Int(value),
|
||||
<value:float> => ast::Constant::Float(value),
|
||||
<s:complex> => ast::Constant::Complex { real: s.0, imag: s.1 },
|
||||
};
|
||||
|
||||
Bytes: Vec<u8> = {
|
||||
<s:bytes+> => {
|
||||
s.into_iter().flatten().collect::<Vec<u8>>()
|
||||
},
|
||||
};
|
||||
|
||||
Identifier: String = <s:name> => s;
|
||||
|
||||
// Hook external lexer:
|
||||
|
@ -1448,8 +1441,11 @@ extern {
|
|||
int => lexer::Tok::Int { value: <BigInt> },
|
||||
float => lexer::Tok::Float { value: <f64> },
|
||||
complex => lexer::Tok::Complex { real: <f64>, imag: <f64> },
|
||||
string => lexer::Tok::String { value: <String>, kind: <StringKind> },
|
||||
bytes => lexer::Tok::Bytes { value: <Vec<u8>> },
|
||||
string => lexer::Tok::String {
|
||||
value: <String>,
|
||||
kind: <StringKind>,
|
||||
triple_quoted: <bool>
|
||||
},
|
||||
name => lexer::Tok::Name { name: <String> },
|
||||
"\n" => lexer::Tok::Newline,
|
||||
";" => lexer::Tok::Semi,
|
||||
|
|
|
@ -90,6 +90,15 @@ pub enum FStringErrorType {
|
|||
UnterminatedString,
|
||||
}
|
||||
|
||||
impl FStringErrorType {
|
||||
pub fn to_lexical_error(self, location: Location) -> LexicalError {
|
||||
LexicalError {
|
||||
error: LexicalErrorType::FStringError(self),
|
||||
location,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FStringErrorType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// We no longer need this file
|
||||
use self::FStringErrorType::*;
|
||||
use crate::{
|
||||
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
||||
|
|
|
@ -2,8 +2,7 @@
|
|||
//!
|
||||
//! This means source code is translated into separate tokens.
|
||||
|
||||
use super::token::StringKind;
|
||||
pub use super::token::Tok;
|
||||
pub use super::token::{StringKind, Tok};
|
||||
use crate::ast::Location;
|
||||
use crate::error::{LexicalError, LexicalErrorType};
|
||||
use num_bigint::BigInt;
|
||||
|
@ -217,9 +216,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||
const MAX_UNICODE_NAME: usize = 88;
|
||||
|
||||
impl<T> Lexer<T>
|
||||
where
|
||||
T: Iterator<Item = char>,
|
||||
|
@ -274,8 +270,26 @@ where
|
|||
|
||||
// Check if we have a string:
|
||||
if matches!(self.window[0], Some('"' | '\'')) {
|
||||
let kind = if saw_r {
|
||||
if saw_b {
|
||||
StringKind::RawBytes
|
||||
} else if saw_f {
|
||||
StringKind::RawFString
|
||||
} else {
|
||||
StringKind::RawString
|
||||
}
|
||||
} else if saw_b {
|
||||
StringKind::Bytes
|
||||
} else if saw_u {
|
||||
StringKind::Unicode
|
||||
} else if saw_f {
|
||||
StringKind::FString
|
||||
} else {
|
||||
StringKind::String
|
||||
};
|
||||
|
||||
return self
|
||||
.lex_string(saw_b, saw_r, saw_u, saw_f)
|
||||
.lex_string(kind)
|
||||
.map(|(_, tok, end_pos)| (start_pos, tok, end_pos));
|
||||
}
|
||||
}
|
||||
|
@ -479,87 +493,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
fn unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
|
||||
let mut p: u32 = 0u32;
|
||||
let unicode_error = LexicalError {
|
||||
error: LexicalErrorType::UnicodeError,
|
||||
location: self.get_pos(),
|
||||
};
|
||||
for i in 1..=literal_number {
|
||||
match self.next_char() {
|
||||
Some(c) => match c.to_digit(16) {
|
||||
Some(d) => p += d << ((literal_number - i) * 4),
|
||||
None => return Err(unicode_error),
|
||||
},
|
||||
None => return Err(unicode_error),
|
||||
}
|
||||
}
|
||||
match p {
|
||||
0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
|
||||
_ => std::char::from_u32(p).ok_or(unicode_error),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_octet(&mut self, first: char) -> char {
|
||||
let mut octet_content = String::new();
|
||||
octet_content.push(first);
|
||||
while octet_content.len() < 3 {
|
||||
if let Some('0'..='7') = self.window[0] {
|
||||
octet_content.push(self.next_char().unwrap())
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let value = u32::from_str_radix(&octet_content, 8).unwrap();
|
||||
char::from_u32(value).unwrap()
|
||||
}
|
||||
|
||||
fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
|
||||
let start_pos = self.get_pos();
|
||||
match self.next_char() {
|
||||
Some('{') => {}
|
||||
_ => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: start_pos,
|
||||
})
|
||||
}
|
||||
}
|
||||
let start_pos = self.get_pos();
|
||||
let mut name = String::new();
|
||||
loop {
|
||||
match self.next_char() {
|
||||
Some('}') => break,
|
||||
Some(c) => name.push(c),
|
||||
None => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: self.get_pos(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if name.len() > MAX_UNICODE_NAME {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::UnicodeError,
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
|
||||
unicode_names2::character(&name).ok_or(LexicalError {
|
||||
error: LexicalErrorType::UnicodeError,
|
||||
location: start_pos,
|
||||
})
|
||||
}
|
||||
|
||||
fn lex_string(
|
||||
&mut self,
|
||||
is_bytes: bool,
|
||||
is_raw: bool,
|
||||
is_unicode: bool,
|
||||
is_fstring: bool,
|
||||
) -> LexResult {
|
||||
fn lex_string(&mut self, kind: StringKind) -> LexResult {
|
||||
let start_pos = self.get_pos();
|
||||
let quote_char = self.next_char().unwrap();
|
||||
let mut string_content = String::new();
|
||||
|
@ -577,62 +511,24 @@ where
|
|||
|
||||
loop {
|
||||
match self.next_char() {
|
||||
Some('\\') => {
|
||||
if self.window[0] == Some(quote_char) && !is_raw {
|
||||
string_content.push(quote_char);
|
||||
self.next_char();
|
||||
} else if is_raw {
|
||||
Some(c) => {
|
||||
if c == '\\' {
|
||||
if let Some(next_c) = self.next_char() {
|
||||
string_content.push('\\');
|
||||
if let Some(c) = self.next_char() {
|
||||
string_content.push(c)
|
||||
} else {
|
||||
string_content.push(next_c);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if c == '\n' && !triple_quoted {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
error: LexicalErrorType::OtherError(
|
||||
"EOL while scanning string literal".to_owned(),
|
||||
),
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
match self.next_char() {
|
||||
Some('\\') => {
|
||||
string_content.push('\\');
|
||||
}
|
||||
Some('\'') => string_content.push('\''),
|
||||
Some('\"') => string_content.push('\"'),
|
||||
Some('\n') => {
|
||||
// Ignore Unix EOL character
|
||||
}
|
||||
Some('a') => string_content.push('\x07'),
|
||||
Some('b') => string_content.push('\x08'),
|
||||
Some('f') => string_content.push('\x0c'),
|
||||
Some('n') => {
|
||||
string_content.push('\n');
|
||||
}
|
||||
Some('r') => string_content.push('\r'),
|
||||
Some('t') => {
|
||||
string_content.push('\t');
|
||||
}
|
||||
Some('v') => string_content.push('\x0b'),
|
||||
Some(o @ '0'..='7') => string_content.push(self.parse_octet(o)),
|
||||
Some('x') => string_content.push(self.unicode_literal(2)?),
|
||||
Some('u') if !is_bytes => string_content.push(self.unicode_literal(4)?),
|
||||
Some('U') if !is_bytes => string_content.push(self.unicode_literal(8)?),
|
||||
Some('N') if !is_bytes => {
|
||||
string_content.push(self.parse_unicode_name()?)
|
||||
}
|
||||
Some(c) => {
|
||||
string_content.push('\\');
|
||||
string_content.push(c);
|
||||
}
|
||||
None => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(c) => {
|
||||
|
||||
if c == quote_char {
|
||||
if triple_quoted {
|
||||
// Look ahead at the next two characters; if we have two more
|
||||
|
@ -645,20 +541,12 @@ where
|
|||
self.next_char();
|
||||
break;
|
||||
}
|
||||
string_content.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (c == '\n' && !triple_quoted) || (is_bytes && !c.is_ascii()) {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::Eof,
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
string_content.push(c);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(LexicalError {
|
||||
error: if triple_quoted {
|
||||
|
@ -672,25 +560,11 @@ where
|
|||
}
|
||||
}
|
||||
let end_pos = self.get_pos();
|
||||
|
||||
let tok = if is_bytes {
|
||||
Tok::Bytes {
|
||||
value: string_content.chars().map(|c| c as u8).collect(),
|
||||
}
|
||||
} else {
|
||||
let kind = if is_fstring {
|
||||
StringKind::F
|
||||
} else if is_unicode {
|
||||
StringKind::U
|
||||
} else {
|
||||
StringKind::Normal
|
||||
};
|
||||
Tok::String {
|
||||
let tok = Tok::String {
|
||||
value: string_content,
|
||||
kind,
|
||||
}
|
||||
triple_quoted,
|
||||
};
|
||||
|
||||
Ok((start_pos, tok, end_pos))
|
||||
}
|
||||
|
||||
|
@ -907,7 +781,7 @@ where
|
|||
self.emit(comment);
|
||||
}
|
||||
'"' | '\'' => {
|
||||
let string = self.lex_string(false, false, false, false)?;
|
||||
let string = self.lex_string(StringKind::String)?;
|
||||
self.emit(string);
|
||||
}
|
||||
'=' => {
|
||||
|
@ -1367,15 +1241,17 @@ mod tests {
|
|||
fn stok(s: &str) -> Tok {
|
||||
Tok::String {
|
||||
value: s.to_owned(),
|
||||
kind: StringKind::Normal,
|
||||
kind: StringKind::String,
|
||||
triple_quoted: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_string() {
|
||||
let source = "r\"\\\\\" \"\\\\\"";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(tokens, vec![stok("\\\\"), stok("\\"), Tok::Newline,]);
|
||||
fn raw_stok(s: &str) -> Tok {
|
||||
Tok::String {
|
||||
value: s.to_owned(),
|
||||
kind: StringKind::RawString,
|
||||
triple_quoted: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1677,13 +1553,13 @@ mod tests {
|
|||
vec![
|
||||
stok("double"),
|
||||
stok("single"),
|
||||
stok("can't"),
|
||||
stok("\\\""),
|
||||
stok("\t\r\n"),
|
||||
stok("\\g"),
|
||||
stok("raw\\'"),
|
||||
stok("Đ"),
|
||||
stok("\u{80}\u{0}a"),
|
||||
stok(r"can\'t"),
|
||||
stok(r#"\\\""#),
|
||||
stok(r"\t\r\n"),
|
||||
stok(r"\g"),
|
||||
raw_stok(r"raw\'"),
|
||||
stok(r"\420"),
|
||||
stok(r"\200\0a"),
|
||||
Tok::Newline,
|
||||
]
|
||||
);
|
||||
|
@ -1699,7 +1575,7 @@ mod tests {
|
|||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
stok("abcdef"),
|
||||
stok("abc\\\ndef"),
|
||||
Tok::Newline,
|
||||
]
|
||||
)
|
||||
|
@ -1714,78 +1590,10 @@ mod tests {
|
|||
test_string_continuation_unix_eol: UNIX_EOL,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_quoted_byte() {
|
||||
// single quote
|
||||
let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
|
||||
let tokens = lex_source(source);
|
||||
let res = (0..=255).collect::<Vec<u8>>();
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_double_quoted_byte() {
|
||||
// double quote
|
||||
let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
|
||||
let tokens = lex_source(source);
|
||||
let res = (0..=255).collect::<Vec<u8>>();
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_char_in_byte_literal() {
|
||||
// backslash does not escape
|
||||
let source = r##"b"omkmok\Xaa""##;
|
||||
let tokens = lex_source(source);
|
||||
let res = vec![111, 109, 107, 109, 111, 107, 92, 88, 97, 97];
|
||||
assert_eq!(tokens, vec![Tok::Bytes { value: res }, Tok::Newline]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_byte_literal() {
|
||||
let source = r"rb'\x1z'";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Bytes {
|
||||
value: b"\\x1z".to_vec()
|
||||
},
|
||||
Tok::Newline
|
||||
]
|
||||
);
|
||||
let source = r"rb'\\'";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Bytes {
|
||||
value: b"\\\\".to_vec()
|
||||
},
|
||||
Tok::Newline
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_octet() {
|
||||
let source = r##"b'\43a\4\1234'"##;
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Bytes {
|
||||
value: b"#a\x04S4".to_vec()
|
||||
},
|
||||
Tok::Newline
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_unicode_name() {
|
||||
let source = r#""\N{EN SPACE}""#;
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(tokens, vec![stok("\u{2002}"), Tok::Newline])
|
||||
assert_eq!(tokens, vec![stok(r"\N{EN SPACE}"), Tok::Newline])
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,11 +23,11 @@ extern crate log;
|
|||
pub use rustpython_ast as ast;
|
||||
|
||||
pub mod error;
|
||||
mod fstring;
|
||||
mod function;
|
||||
pub mod lexer;
|
||||
pub mod mode;
|
||||
pub mod parser;
|
||||
mod string_parser;
|
||||
#[rustfmt::skip]
|
||||
mod python;
|
||||
mod context;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
use crate::lexer::{LexResult, Tok};
|
||||
pub use crate::mode::Mode;
|
||||
use crate::{ast, error::ParseError, lexer, python};
|
||||
use ast::Location;
|
||||
use itertools::Itertools;
|
||||
use std::iter;
|
||||
|
||||
|
@ -65,7 +66,15 @@ pub fn parse_program(source: &str, source_path: &str) -> Result<ast::Suite, Pars
|
|||
///
|
||||
/// ```
|
||||
pub fn parse_expression(source: &str, path: &str) -> Result<ast::Expr, ParseError> {
|
||||
parse(source, Mode::Expression, path).map(|top| match top {
|
||||
parse_expression_located(source, path, Location::new(1, 0))
|
||||
}
|
||||
|
||||
pub fn parse_expression_located(
|
||||
source: &str,
|
||||
path: &str,
|
||||
location: Location,
|
||||
) -> Result<ast::Expr, ParseError> {
|
||||
parse_located(source, Mode::Expression, path, location).map(|top| match top {
|
||||
ast::Mod::Expression { body } => *body,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
|
@ -73,7 +82,17 @@ pub fn parse_expression(source: &str, path: &str) -> Result<ast::Expr, ParseErro
|
|||
|
||||
// Parse a given source code
|
||||
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
|
||||
let lxr = lexer::make_tokenizer(source);
|
||||
parse_located(source, mode, source_path, Location::new(1, 0))
|
||||
}
|
||||
|
||||
// Parse a given source code from a given location
|
||||
pub fn parse_located(
|
||||
source: &str,
|
||||
mode: Mode,
|
||||
source_path: &str,
|
||||
location: Location,
|
||||
) -> Result<ast::Mod, ParseError> {
|
||||
let lxr = lexer::make_tokenizer_located(source, location);
|
||||
let marker_token = (Default::default(), mode.to_marker(), Default::default());
|
||||
let tokenizer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
|
|
|
@ -0,0 +1,297 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 738,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 738,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Bytes(
|
||||
[
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80,
|
||||
81,
|
||||
82,
|
||||
83,
|
||||
84,
|
||||
85,
|
||||
86,
|
||||
87,
|
||||
88,
|
||||
89,
|
||||
90,
|
||||
91,
|
||||
92,
|
||||
93,
|
||||
94,
|
||||
95,
|
||||
96,
|
||||
97,
|
||||
98,
|
||||
99,
|
||||
100,
|
||||
101,
|
||||
102,
|
||||
103,
|
||||
104,
|
||||
105,
|
||||
106,
|
||||
107,
|
||||
108,
|
||||
109,
|
||||
110,
|
||||
111,
|
||||
112,
|
||||
113,
|
||||
114,
|
||||
115,
|
||||
116,
|
||||
117,
|
||||
118,
|
||||
119,
|
||||
120,
|
||||
121,
|
||||
122,
|
||||
123,
|
||||
124,
|
||||
125,
|
||||
126,
|
||||
127,
|
||||
128,
|
||||
129,
|
||||
130,
|
||||
131,
|
||||
132,
|
||||
133,
|
||||
134,
|
||||
135,
|
||||
136,
|
||||
137,
|
||||
138,
|
||||
139,
|
||||
140,
|
||||
141,
|
||||
142,
|
||||
143,
|
||||
144,
|
||||
145,
|
||||
146,
|
||||
147,
|
||||
148,
|
||||
149,
|
||||
150,
|
||||
151,
|
||||
152,
|
||||
153,
|
||||
154,
|
||||
155,
|
||||
156,
|
||||
157,
|
||||
158,
|
||||
159,
|
||||
160,
|
||||
161,
|
||||
162,
|
||||
163,
|
||||
164,
|
||||
165,
|
||||
166,
|
||||
167,
|
||||
168,
|
||||
169,
|
||||
170,
|
||||
171,
|
||||
172,
|
||||
173,
|
||||
174,
|
||||
175,
|
||||
176,
|
||||
177,
|
||||
178,
|
||||
179,
|
||||
180,
|
||||
181,
|
||||
182,
|
||||
183,
|
||||
184,
|
||||
185,
|
||||
186,
|
||||
187,
|
||||
188,
|
||||
189,
|
||||
190,
|
||||
191,
|
||||
192,
|
||||
193,
|
||||
194,
|
||||
195,
|
||||
196,
|
||||
197,
|
||||
198,
|
||||
199,
|
||||
200,
|
||||
201,
|
||||
202,
|
||||
203,
|
||||
204,
|
||||
205,
|
||||
206,
|
||||
207,
|
||||
208,
|
||||
209,
|
||||
210,
|
||||
211,
|
||||
212,
|
||||
213,
|
||||
214,
|
||||
215,
|
||||
216,
|
||||
217,
|
||||
218,
|
||||
219,
|
||||
220,
|
||||
221,
|
||||
222,
|
||||
223,
|
||||
224,
|
||||
225,
|
||||
226,
|
||||
227,
|
||||
228,
|
||||
229,
|
||||
230,
|
||||
231,
|
||||
232,
|
||||
233,
|
||||
234,
|
||||
235,
|
||||
236,
|
||||
237,
|
||||
238,
|
||||
239,
|
||||
240,
|
||||
241,
|
||||
242,
|
||||
243,
|
||||
244,
|
||||
245,
|
||||
246,
|
||||
247,
|
||||
248,
|
||||
249,
|
||||
250,
|
||||
251,
|
||||
252,
|
||||
253,
|
||||
254,
|
||||
255,
|
||||
],
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,51 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 13,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 13,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Bytes(
|
||||
[
|
||||
111,
|
||||
109,
|
||||
107,
|
||||
109,
|
||||
111,
|
||||
107,
|
||||
92,
|
||||
88,
|
||||
97,
|
||||
97,
|
||||
],
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,46 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 14,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 14,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Bytes(
|
||||
[
|
||||
35,
|
||||
97,
|
||||
4,
|
||||
83,
|
||||
52,
|
||||
],
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: JoinedStr {
|
||||
values: [
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Str(
|
||||
"\\",
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: FormattedValue {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 5,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Name {
|
||||
id: "x",
|
||||
ctx: Load,
|
||||
},
|
||||
},
|
||||
conversion: 0,
|
||||
format_spec: None,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: JoinedStr {
|
||||
values: [
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Str(
|
||||
"\n",
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: FormattedValue {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 5,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Name {
|
||||
id: "x",
|
||||
ctx: Load,
|
||||
},
|
||||
},
|
||||
conversion: 0,
|
||||
format_spec: None,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 4,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 4,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: JoinedStr {
|
||||
values: [
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 4,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Str(
|
||||
"\\\n",
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 4,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: FormattedValue {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 2,
|
||||
column: 1,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 2,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Name {
|
||||
id: "x",
|
||||
ctx: Load,
|
||||
},
|
||||
},
|
||||
conversion: 0,
|
||||
format_spec: None,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,91 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: JoinedStr {
|
||||
values: [
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Str(
|
||||
"\n",
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: FormattedValue {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 2,
|
||||
column: 1,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 2,
|
||||
column: 2,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Name {
|
||||
id: "x",
|
||||
ctx: Load,
|
||||
},
|
||||
},
|
||||
conversion: 0,
|
||||
format_spec: None,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -65,12 +65,12 @@ expression: parse_ast
|
|||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 1,
|
||||
column: 17,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 4,
|
||||
column: 20,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 8,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Bytes(
|
||||
[
|
||||
92,
|
||||
120,
|
||||
49,
|
||||
122,
|
||||
],
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,43 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 6,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Bytes(
|
||||
[
|
||||
92,
|
||||
92,
|
||||
],
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 7,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 7,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: JoinedStr {
|
||||
values: [
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 7,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: FormattedValue {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 4,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 5,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Name {
|
||||
id: "x",
|
||||
ctx: Load,
|
||||
},
|
||||
},
|
||||
conversion: 0,
|
||||
format_spec: None,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,297 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 738,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 738,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Constant {
|
||||
value: Bytes(
|
||||
[
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80,
|
||||
81,
|
||||
82,
|
||||
83,
|
||||
84,
|
||||
85,
|
||||
86,
|
||||
87,
|
||||
88,
|
||||
89,
|
||||
90,
|
||||
91,
|
||||
92,
|
||||
93,
|
||||
94,
|
||||
95,
|
||||
96,
|
||||
97,
|
||||
98,
|
||||
99,
|
||||
100,
|
||||
101,
|
||||
102,
|
||||
103,
|
||||
104,
|
||||
105,
|
||||
106,
|
||||
107,
|
||||
108,
|
||||
109,
|
||||
110,
|
||||
111,
|
||||
112,
|
||||
113,
|
||||
114,
|
||||
115,
|
||||
116,
|
||||
117,
|
||||
118,
|
||||
119,
|
||||
120,
|
||||
121,
|
||||
122,
|
||||
123,
|
||||
124,
|
||||
125,
|
||||
126,
|
||||
127,
|
||||
128,
|
||||
129,
|
||||
130,
|
||||
131,
|
||||
132,
|
||||
133,
|
||||
134,
|
||||
135,
|
||||
136,
|
||||
137,
|
||||
138,
|
||||
139,
|
||||
140,
|
||||
141,
|
||||
142,
|
||||
143,
|
||||
144,
|
||||
145,
|
||||
146,
|
||||
147,
|
||||
148,
|
||||
149,
|
||||
150,
|
||||
151,
|
||||
152,
|
||||
153,
|
||||
154,
|
||||
155,
|
||||
156,
|
||||
157,
|
||||
158,
|
||||
159,
|
||||
160,
|
||||
161,
|
||||
162,
|
||||
163,
|
||||
164,
|
||||
165,
|
||||
166,
|
||||
167,
|
||||
168,
|
||||
169,
|
||||
170,
|
||||
171,
|
||||
172,
|
||||
173,
|
||||
174,
|
||||
175,
|
||||
176,
|
||||
177,
|
||||
178,
|
||||
179,
|
||||
180,
|
||||
181,
|
||||
182,
|
||||
183,
|
||||
184,
|
||||
185,
|
||||
186,
|
||||
187,
|
||||
188,
|
||||
189,
|
||||
190,
|
||||
191,
|
||||
192,
|
||||
193,
|
||||
194,
|
||||
195,
|
||||
196,
|
||||
197,
|
||||
198,
|
||||
199,
|
||||
200,
|
||||
201,
|
||||
202,
|
||||
203,
|
||||
204,
|
||||
205,
|
||||
206,
|
||||
207,
|
||||
208,
|
||||
209,
|
||||
210,
|
||||
211,
|
||||
212,
|
||||
213,
|
||||
214,
|
||||
215,
|
||||
216,
|
||||
217,
|
||||
218,
|
||||
219,
|
||||
220,
|
||||
221,
|
||||
222,
|
||||
223,
|
||||
224,
|
||||
225,
|
||||
226,
|
||||
227,
|
||||
228,
|
||||
229,
|
||||
230,
|
||||
231,
|
||||
232,
|
||||
233,
|
||||
234,
|
||||
235,
|
||||
236,
|
||||
237,
|
||||
238,
|
||||
239,
|
||||
240,
|
||||
241,
|
||||
242,
|
||||
243,
|
||||
244,
|
||||
245,
|
||||
246,
|
||||
247,
|
||||
248,
|
||||
249,
|
||||
250,
|
||||
251,
|
||||
252,
|
||||
253,
|
||||
254,
|
||||
255,
|
||||
],
|
||||
),
|
||||
kind: None,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -0,0 +1,72 @@
|
|||
---
|
||||
source: compiler/parser/src/string.rs
|
||||
expression: parse_ast
|
||||
---
|
||||
[
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 11,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Expr {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 11,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: JoinedStr {
|
||||
values: [
|
||||
Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 0,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 11,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: FormattedValue {
|
||||
value: Located {
|
||||
location: Location {
|
||||
row: 1,
|
||||
column: 6,
|
||||
},
|
||||
end_location: Some(
|
||||
Location {
|
||||
row: 1,
|
||||
column: 7,
|
||||
},
|
||||
),
|
||||
custom: (),
|
||||
node: Name {
|
||||
id: "x",
|
||||
ctx: Load,
|
||||
},
|
||||
},
|
||||
conversion: 0,
|
||||
format_spec: None,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
|
@ -1,35 +1,79 @@
|
|||
use crate::{
|
||||
ast::{Constant, Expr, ExprKind, Location},
|
||||
error::{LexicalError, LexicalErrorType},
|
||||
fstring::parse_located_fstring,
|
||||
string_parser::parse_string,
|
||||
token::StringKind,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
|
||||
pub fn parse_strings(
|
||||
values: Vec<(Location, (String, StringKind), Location)>,
|
||||
values: Vec<(Location, (String, StringKind, bool), Location)>,
|
||||
) -> Result<Expr, LexicalError> {
|
||||
// Preserve the initial location and kind.
|
||||
let initial_start = values[0].0;
|
||||
let last_end = values.last().unwrap().2;
|
||||
let initial_kind = (values[0].1 .1 == StringKind::U).then(|| "u".to_owned());
|
||||
let initial_kind = (values[0].1 .1 == StringKind::Unicode).then(|| "u".to_owned());
|
||||
let has_fstring = values.iter().any(|(_, (_, kind, ..), _)| kind.is_fstring());
|
||||
let num_bytes = values
|
||||
.iter()
|
||||
.filter(|(_, (_, kind, ..), _)| kind.is_bytes())
|
||||
.count();
|
||||
let has_bytes = num_bytes > 0;
|
||||
|
||||
// Optimization: fast-track the common case of a single string.
|
||||
if matches!(&*values, [(_, (_, StringKind::Normal | StringKind::U), _)]) {
|
||||
let value = values.into_iter().last().unwrap().1 .0;
|
||||
if has_bytes && num_bytes < values.len() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(
|
||||
"cannot mix bytes and nonbytes literals".to_owned(),
|
||||
),
|
||||
location: initial_start,
|
||||
});
|
||||
}
|
||||
|
||||
if has_bytes {
|
||||
let mut content: Vec<u8> = vec![];
|
||||
for (start, (source, kind, triple_quoted), end) in values {
|
||||
for value in parse_string(&source, kind, triple_quoted, start, end)? {
|
||||
match value.node {
|
||||
ExprKind::Constant {
|
||||
value: Constant::Bytes(value),
|
||||
..
|
||||
} => content.extend(value),
|
||||
_ => unreachable!("Unexpected non-bytes expression."),
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(Expr::new(
|
||||
initial_start,
|
||||
last_end,
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(value),
|
||||
kind: initial_kind,
|
||||
value: Constant::Bytes(content),
|
||||
kind: None,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
// Determine whether the list of values contains any f-strings. (If not, we can return a
|
||||
// single Constant at the end, rather than a JoinedStr.)
|
||||
let mut has_fstring = false;
|
||||
if !has_fstring {
|
||||
let mut content: Vec<String> = vec![];
|
||||
for (start, (source, kind, triple_quoted), end) in values {
|
||||
for value in parse_string(&source, kind, triple_quoted, start, end)? {
|
||||
match value.node {
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(value),
|
||||
..
|
||||
} => content.push(value),
|
||||
_ => unreachable!("Unexpected non-string expression."),
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(Expr::new(
|
||||
initial_start,
|
||||
last_end,
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(content.join("")),
|
||||
kind: initial_kind,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
// De-duplicate adjacent constants.
|
||||
let mut deduped: Vec<Expr> = vec![];
|
||||
|
@ -46,17 +90,8 @@ pub fn parse_strings(
|
|||
)
|
||||
};
|
||||
|
||||
for (start, (string, string_kind), end) in values {
|
||||
match string_kind {
|
||||
StringKind::Normal | StringKind::U => current.push(string),
|
||||
StringKind::F => {
|
||||
has_fstring = true;
|
||||
for value in
|
||||
parse_located_fstring(&string, start, end).map_err(|e| LexicalError {
|
||||
location: start,
|
||||
error: LexicalErrorType::FStringError(e.error),
|
||||
})?
|
||||
{
|
||||
for (start, (source, kind, triple_quoted), end) in values {
|
||||
for value in parse_string(&source, kind, triple_quoted, start, end)? {
|
||||
match value.node {
|
||||
ExprKind::FormattedValue { .. } => {
|
||||
if !current.is_empty() {
|
||||
|
@ -64,19 +99,14 @@ pub fn parse_strings(
|
|||
}
|
||||
deduped.push(value)
|
||||
}
|
||||
ExprKind::Constant { value, .. } => {
|
||||
if let Constant::Str(value) = value {
|
||||
current.push(value);
|
||||
} else {
|
||||
unreachable!("Unexpected non-string constant.");
|
||||
}
|
||||
}
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(value),
|
||||
..
|
||||
} => current.push(value),
|
||||
_ => unreachable!("Unexpected non-string expression."),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !current.is_empty() {
|
||||
deduped.push(take_current(&mut current));
|
||||
}
|
||||
|
@ -101,64 +131,153 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_parse_string_concat() {
|
||||
let source = String::from("'Hello ' 'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' 'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_string_concat_1() {
|
||||
let source = String::from("'Hello ' u'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' u'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_string_concat_2() {
|
||||
let source = String::from("u'Hello ' 'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'Hello ' 'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_f_string_concat_1() {
|
||||
let source = String::from("'Hello ' f'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' f'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_f_string_concat_2() {
|
||||
let source = String::from("'Hello ' f'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' f'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_f_string_concat_3() {
|
||||
let source = String::from("'Hello ' f'world{\"!\"}'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' f'world{\"!\"}'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_f_string_concat_1() {
|
||||
let source = String::from("u'Hello ' f'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'Hello ' f'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_f_string_concat_2() {
|
||||
let source = String::from("u'Hello ' f'world' '!'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'Hello ' f'world' '!'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_string_triple_quotes_with_kind() {
|
||||
let source = String::from("u'''Hello, world!'''");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'''Hello, world!'''";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_quoted_byte() {
|
||||
// single quote
|
||||
let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_double_quoted_byte() {
|
||||
// double quote
|
||||
let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_char_in_byte_literal() {
|
||||
// backslash does not escape
|
||||
let source = r##"b"omkmok\Xaa""##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_byte_literal_1() {
|
||||
let source = r"rb'\x1z'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_byte_literal_2() {
|
||||
let source = r"rb'\\'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_octet() {
|
||||
let source = r##"b'\43a\4\1234'"##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escaped_newline() {
|
||||
let source = r#"f"\n{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_unescaped_newline() {
|
||||
let source = r#"f"""
|
||||
{x}""""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escaped_character() {
|
||||
let source = r#"f"\\{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_fstring() {
|
||||
let source = r#"rf"{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_quoted_raw_fstring() {
|
||||
let source = r#"rf"""{x}""""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_line_continuation() {
|
||||
let source = r#"rf"\
|
||||
{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
}
|
||||
|
|
562
parser/src/string_parser.rs
Normal file
562
parser/src/string_parser.rs
Normal file
|
@ -0,0 +1,562 @@
|
|||
use self::FStringErrorType::*;
|
||||
use crate::{
|
||||
ast::{Constant, ConversionFlag, Expr, ExprKind, Location},
|
||||
error::{FStringErrorType, LexicalError, LexicalErrorType, ParseError},
|
||||
parser::parse_expression_located,
|
||||
token::StringKind,
|
||||
};
|
||||
use std::{iter, mem, str};
|
||||
|
||||
/// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||
pub const MAX_UNICODE_NAME: usize = 88;
|
||||
|
||||
pub struct StringParser<'a> {
|
||||
chars: iter::Peekable<str::Chars<'a>>,
|
||||
kind: StringKind,
|
||||
str_start: Location,
|
||||
str_end: Location,
|
||||
location: Location,
|
||||
}
|
||||
|
||||
impl<'a> StringParser<'a> {
|
||||
pub fn new(
|
||||
source: &'a str,
|
||||
kind: StringKind,
|
||||
triple_quoted: bool,
|
||||
str_start: Location,
|
||||
str_end: Location,
|
||||
) -> Self {
|
||||
let offset = kind.to_string().len() + if triple_quoted { 3 } else { 1 };
|
||||
Self {
|
||||
chars: source.chars().peekable(),
|
||||
kind,
|
||||
str_start,
|
||||
str_end,
|
||||
location: Location::new(str_start.row(), str_start.column() + offset),
|
||||
}
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> Option<char> {
|
||||
let Some(c) = self.chars.next() else {
|
||||
return None
|
||||
};
|
||||
if c == '\n' {
|
||||
self.location.newline();
|
||||
} else {
|
||||
self.location.go_right();
|
||||
}
|
||||
Some(c)
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.chars.peek()
|
||||
}
|
||||
|
||||
fn get_pos(&self) -> Location {
|
||||
self.location
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn expr(&self, node: ExprKind) -> Expr {
|
||||
Expr::new(self.str_start, self.str_end, node)
|
||||
}
|
||||
|
||||
fn parse_unicode_literal(&mut self, literal_number: usize) -> Result<char, LexicalError> {
|
||||
let mut p: u32 = 0u32;
|
||||
let unicode_error = LexicalError {
|
||||
error: LexicalErrorType::UnicodeError,
|
||||
location: self.get_pos(),
|
||||
};
|
||||
for i in 1..=literal_number {
|
||||
match self.next_char() {
|
||||
Some(c) => match c.to_digit(16) {
|
||||
Some(d) => p += d << ((literal_number - i) * 4),
|
||||
None => return Err(unicode_error),
|
||||
},
|
||||
None => return Err(unicode_error),
|
||||
}
|
||||
}
|
||||
match p {
|
||||
0xD800..=0xDFFF => Ok(std::char::REPLACEMENT_CHARACTER),
|
||||
_ => std::char::from_u32(p).ok_or(unicode_error),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_octet(&mut self, first: char) -> char {
|
||||
let mut octet_content = String::new();
|
||||
octet_content.push(first);
|
||||
while octet_content.len() < 3 {
|
||||
if let Some('0'..='7') = self.peek() {
|
||||
octet_content.push(self.next_char().unwrap())
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let value = u32::from_str_radix(&octet_content, 8).unwrap();
|
||||
char::from_u32(value).unwrap()
|
||||
}
|
||||
|
||||
fn parse_unicode_name(&mut self) -> Result<char, LexicalError> {
|
||||
let start_pos = self.get_pos();
|
||||
match self.next_char() {
|
||||
Some('{') => {}
|
||||
_ => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: start_pos,
|
||||
})
|
||||
}
|
||||
}
|
||||
let start_pos = self.get_pos();
|
||||
let mut name = String::new();
|
||||
loop {
|
||||
match self.next_char() {
|
||||
Some('}') => break,
|
||||
Some(c) => name.push(c),
|
||||
None => {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: self.get_pos(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if name.len() > MAX_UNICODE_NAME {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::UnicodeError,
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
|
||||
unicode_names2::character(&name).ok_or(LexicalError {
|
||||
error: LexicalErrorType::UnicodeError,
|
||||
location: start_pos,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_escaped_char(&mut self) -> Result<String, LexicalError> {
|
||||
match self.next_char() {
|
||||
Some(c) => Ok(match c {
|
||||
'\\' => '\\'.to_string(),
|
||||
'\'' => '\''.to_string(),
|
||||
'\"' => '"'.to_string(),
|
||||
'\n' => "".to_string(),
|
||||
'a' => '\x07'.to_string(),
|
||||
'b' => '\x08'.to_string(),
|
||||
'f' => '\x0c'.to_string(),
|
||||
'n' => '\n'.to_string(),
|
||||
'r' => '\r'.to_string(),
|
||||
't' => '\t'.to_string(),
|
||||
'v' => '\x0b'.to_string(),
|
||||
o @ '0'..='7' => self.parse_octet(o).to_string(),
|
||||
'x' => self.parse_unicode_literal(2)?.to_string(),
|
||||
'u' if !self.kind.is_bytes() => self.parse_unicode_literal(4)?.to_string(),
|
||||
'U' if !self.kind.is_bytes() => self.parse_unicode_literal(8)?.to_string(),
|
||||
'N' if !self.kind.is_bytes() => self.parse_unicode_name()?.to_string(),
|
||||
c => {
|
||||
if self.kind.is_bytes() && !c.is_ascii() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(
|
||||
"bytes can only contain ASCII literal characters".to_owned(),
|
||||
),
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
format!("\\{c}")
|
||||
}
|
||||
}),
|
||||
None => Err(LexicalError {
|
||||
error: LexicalErrorType::StringError,
|
||||
location: self.get_pos(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_formatted_value(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||
let mut expression = String::new();
|
||||
let mut spec = None;
|
||||
let mut delims = Vec::new();
|
||||
let mut conversion = ConversionFlag::None;
|
||||
let mut self_documenting = false;
|
||||
let mut trailing_seq = String::new();
|
||||
let location = self.get_pos();
|
||||
|
||||
while let Some(ch) = self.next_char() {
|
||||
match ch {
|
||||
// can be integrated better with the remaining code, but as a starting point ok
|
||||
// in general I would do here a tokenizing of the fstrings to omit this peeking.
|
||||
'!' if self.peek() == Some(&'=') => {
|
||||
expression.push_str("!=");
|
||||
self.next_char();
|
||||
}
|
||||
|
||||
'=' if self.peek() == Some(&'=') => {
|
||||
expression.push_str("==");
|
||||
self.next_char();
|
||||
}
|
||||
|
||||
'>' if self.peek() == Some(&'=') => {
|
||||
expression.push_str(">=");
|
||||
self.next_char();
|
||||
}
|
||||
|
||||
'<' if self.peek() == Some(&'=') => {
|
||||
expression.push_str("<=");
|
||||
self.next_char();
|
||||
}
|
||||
|
||||
'!' if delims.is_empty() && self.peek() != Some(&'=') => {
|
||||
if expression.trim().is_empty() {
|
||||
return Err(EmptyExpression.to_lexical_error(self.get_pos()));
|
||||
}
|
||||
|
||||
conversion = match self.next_char() {
|
||||
Some('s') => ConversionFlag::Str,
|
||||
Some('a') => ConversionFlag::Ascii,
|
||||
Some('r') => ConversionFlag::Repr,
|
||||
Some(_) => {
|
||||
return Err(if expression.trim().is_empty() {
|
||||
EmptyExpression.to_lexical_error(self.get_pos())
|
||||
} else {
|
||||
InvalidConversionFlag.to_lexical_error(self.get_pos())
|
||||
});
|
||||
}
|
||||
None => {
|
||||
return Err(if expression.trim().is_empty() {
|
||||
EmptyExpression.to_lexical_error(self.get_pos())
|
||||
} else {
|
||||
UnclosedLbrace.to_lexical_error(self.get_pos())
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(&peek) = self.peek() {
|
||||
if peek != '}' && peek != ':' {
|
||||
return Err(if expression.trim().is_empty() {
|
||||
EmptyExpression.to_lexical_error(self.get_pos())
|
||||
} else {
|
||||
UnclosedLbrace.to_lexical_error(self.get_pos())
|
||||
});
|
||||
}
|
||||
} else {
|
||||
return Err(if expression.trim().is_empty() {
|
||||
EmptyExpression.to_lexical_error(self.get_pos())
|
||||
} else {
|
||||
UnclosedLbrace.to_lexical_error(self.get_pos())
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// match a python 3.8 self documenting expression
|
||||
// format '{' PYTHON_EXPRESSION '=' FORMAT_SPECIFIER? '}'
|
||||
'=' if self.peek() != Some(&'=') && delims.is_empty() => {
|
||||
self_documenting = true;
|
||||
}
|
||||
|
||||
':' if delims.is_empty() => {
|
||||
let parsed_spec = self.parse_spec(nested)?;
|
||||
|
||||
spec = Some(Box::new(self.expr(ExprKind::JoinedStr {
|
||||
values: parsed_spec,
|
||||
})));
|
||||
}
|
||||
'(' | '{' | '[' => {
|
||||
expression.push(ch);
|
||||
delims.push(ch);
|
||||
}
|
||||
')' => {
|
||||
let last_delim = delims.pop();
|
||||
match last_delim {
|
||||
Some('(') => {
|
||||
expression.push(ch);
|
||||
}
|
||||
Some(c) => {
|
||||
return Err(
|
||||
MismatchedDelimiter(c, ')').to_lexical_error(self.get_pos())
|
||||
);
|
||||
}
|
||||
None => {
|
||||
return Err(Unmatched(')').to_lexical_error(self.get_pos()));
|
||||
}
|
||||
}
|
||||
}
|
||||
']' => {
|
||||
let last_delim = delims.pop();
|
||||
match last_delim {
|
||||
Some('[') => {
|
||||
expression.push(ch);
|
||||
}
|
||||
Some(c) => {
|
||||
return Err(
|
||||
MismatchedDelimiter(c, ']').to_lexical_error(self.get_pos())
|
||||
);
|
||||
}
|
||||
None => {
|
||||
return Err(Unmatched(']').to_lexical_error(self.get_pos()));
|
||||
}
|
||||
}
|
||||
}
|
||||
'}' if !delims.is_empty() => {
|
||||
let last_delim = delims.pop();
|
||||
match last_delim {
|
||||
Some('{') => {
|
||||
expression.push(ch);
|
||||
}
|
||||
Some(c) => {
|
||||
return Err(MismatchedDelimiter(c, '}').to_lexical_error(self.get_pos()))
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
'}' => {
|
||||
if expression.trim().is_empty() {
|
||||
return Err(EmptyExpression.to_lexical_error(self.get_pos()));
|
||||
}
|
||||
|
||||
let ret = if !self_documenting {
|
||||
vec![self.expr(ExprKind::FormattedValue {
|
||||
value: Box::new(parse_fstring_expr(&expression, location).map_err(
|
||||
|e| {
|
||||
InvalidExpression(Box::new(e.error))
|
||||
.to_lexical_error(self.get_pos())
|
||||
},
|
||||
)?),
|
||||
conversion: conversion as _,
|
||||
format_spec: spec,
|
||||
})]
|
||||
} else {
|
||||
vec![
|
||||
self.expr(ExprKind::Constant {
|
||||
value: Constant::Str(expression.to_owned() + "="),
|
||||
kind: None,
|
||||
}),
|
||||
self.expr(ExprKind::Constant {
|
||||
value: trailing_seq.into(),
|
||||
kind: None,
|
||||
}),
|
||||
self.expr(ExprKind::FormattedValue {
|
||||
value: Box::new(
|
||||
parse_fstring_expr(&expression, location).map_err(|e| {
|
||||
InvalidExpression(Box::new(e.error))
|
||||
.to_lexical_error(self.get_pos())
|
||||
})?,
|
||||
),
|
||||
conversion: (if conversion == ConversionFlag::None && spec.is_none()
|
||||
{
|
||||
ConversionFlag::Repr
|
||||
} else {
|
||||
conversion
|
||||
}) as _,
|
||||
format_spec: spec,
|
||||
}),
|
||||
]
|
||||
};
|
||||
return Ok(ret);
|
||||
}
|
||||
'"' | '\'' => {
|
||||
expression.push(ch);
|
||||
loop {
|
||||
let Some(c) = self.next_char() else {
|
||||
return Err(UnterminatedString.to_lexical_error(self.get_pos()));
|
||||
};
|
||||
expression.push(c);
|
||||
if c == ch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
' ' if self_documenting => {
|
||||
trailing_seq.push(ch);
|
||||
}
|
||||
'\\' => return Err(ExpressionCannotInclude('\\').to_lexical_error(self.get_pos())),
|
||||
_ => {
|
||||
if self_documenting {
|
||||
return Err(UnclosedLbrace.to_lexical_error(self.get_pos()));
|
||||
}
|
||||
|
||||
expression.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(if expression.trim().is_empty() {
|
||||
EmptyExpression.to_lexical_error(self.get_pos())
|
||||
} else {
|
||||
UnclosedLbrace.to_lexical_error(self.get_pos())
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_spec(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||
let mut spec_constructor = Vec::new();
|
||||
let mut constant_piece = String::new();
|
||||
while let Some(&next) = self.peek() {
|
||||
match next {
|
||||
'{' => {
|
||||
if !constant_piece.is_empty() {
|
||||
spec_constructor.push(self.expr(ExprKind::Constant {
|
||||
value: constant_piece.to_owned().into(),
|
||||
kind: None,
|
||||
}));
|
||||
constant_piece.clear();
|
||||
}
|
||||
let parsed_expr = self.parse_fstring(nested + 1)?;
|
||||
spec_constructor.extend(parsed_expr);
|
||||
continue;
|
||||
}
|
||||
'}' => {
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
constant_piece.push(next);
|
||||
}
|
||||
}
|
||||
self.next_char();
|
||||
}
|
||||
if !constant_piece.is_empty() {
|
||||
spec_constructor.push(self.expr(ExprKind::Constant {
|
||||
value: constant_piece.to_owned().into(),
|
||||
kind: None,
|
||||
}));
|
||||
constant_piece.clear();
|
||||
}
|
||||
Ok(spec_constructor)
|
||||
}
|
||||
|
||||
fn parse_fstring(&mut self, nested: u8) -> Result<Vec<Expr>, LexicalError> {
|
||||
if nested >= 2 {
|
||||
return Err(ExpressionNestedTooDeeply.to_lexical_error(self.get_pos()));
|
||||
}
|
||||
|
||||
let mut content = String::new();
|
||||
let mut values = vec![];
|
||||
|
||||
while let Some(&ch) = self.peek() {
|
||||
match ch {
|
||||
'{' => {
|
||||
self.next_char();
|
||||
if nested == 0 {
|
||||
match self.peek() {
|
||||
Some('{') => {
|
||||
self.next_char();
|
||||
content.push('{');
|
||||
continue;
|
||||
}
|
||||
None => return Err(UnclosedLbrace.to_lexical_error(self.get_pos())),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if !content.is_empty() {
|
||||
values.push(self.expr(ExprKind::Constant {
|
||||
value: mem::take(&mut content).into(),
|
||||
kind: None,
|
||||
}));
|
||||
}
|
||||
|
||||
let parsed_values = self.parse_formatted_value(nested)?;
|
||||
values.extend(parsed_values);
|
||||
}
|
||||
'}' => {
|
||||
if nested > 0 {
|
||||
break;
|
||||
}
|
||||
self.next_char();
|
||||
if let Some('}') = self.peek() {
|
||||
self.next_char();
|
||||
content.push('}');
|
||||
} else {
|
||||
return Err(SingleRbrace.to_lexical_error(self.get_pos()));
|
||||
}
|
||||
}
|
||||
'\\' if !self.kind.is_raw() => {
|
||||
self.next_char();
|
||||
content.push_str(&self.parse_escaped_char()?);
|
||||
}
|
||||
_ => {
|
||||
content.push(ch);
|
||||
self.next_char();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !content.is_empty() {
|
||||
values.push(self.expr(ExprKind::Constant {
|
||||
value: content.into(),
|
||||
kind: None,
|
||||
}))
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn parse_bytes(&mut self) -> Result<Expr, LexicalError> {
|
||||
let mut content = String::new();
|
||||
while let Some(ch) = self.next_char() {
|
||||
match ch {
|
||||
'\\' if !self.kind.is_raw() => {
|
||||
content.push_str(&self.parse_escaped_char()?);
|
||||
}
|
||||
ch => {
|
||||
if !ch.is_ascii() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(
|
||||
"bytes can only contain ASCII literal characters".to_string(),
|
||||
),
|
||||
location: self.get_pos(),
|
||||
});
|
||||
}
|
||||
content.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.expr(ExprKind::Constant {
|
||||
value: Constant::Bytes(content.chars().map(|c| c as u8).collect()),
|
||||
kind: None,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn parse_string(&mut self) -> Result<Expr, LexicalError> {
|
||||
let mut content = String::new();
|
||||
while let Some(ch) = self.next_char() {
|
||||
match ch {
|
||||
'\\' if !self.kind.is_raw() => {
|
||||
content.push_str(&self.parse_escaped_char()?);
|
||||
}
|
||||
ch => content.push(ch),
|
||||
}
|
||||
}
|
||||
Ok(self.expr(ExprKind::Constant {
|
||||
value: Constant::Str(content),
|
||||
kind: self.kind.is_unicode().then(|| "u".to_string()),
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Result<Vec<Expr>, LexicalError> {
|
||||
if self.kind.is_fstring() {
|
||||
self.parse_fstring(0)
|
||||
} else if self.kind.is_bytes() {
|
||||
self.parse_bytes().map(|expr| vec![expr])
|
||||
} else {
|
||||
self.parse_string().map(|expr| vec![expr])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_fstring_expr(source: &str, location: Location) -> Result<Expr, ParseError> {
|
||||
let fstring_body = format!("({source})");
|
||||
parse_expression_located(
|
||||
&fstring_body,
|
||||
"<fstring>",
|
||||
Location::new(location.row(), location.column() - 1),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parse_string(
|
||||
source: &str,
|
||||
kind: StringKind,
|
||||
triple_quoted: bool,
|
||||
start: Location,
|
||||
end: Location,
|
||||
) -> Result<Vec<Expr>, LexicalError> {
|
||||
StringParser::new(source, kind, triple_quoted, start, end).parse()
|
||||
}
|
|
@ -1,17 +1,29 @@
|
|||
//! Different token definitions.
|
||||
//! Loosely based on token.h from CPython source:
|
||||
use num_bigint::BigInt;
|
||||
use std::fmt::{self, Write};
|
||||
use std::fmt;
|
||||
|
||||
/// Python source code can be tokenized in a sequence of these tokens.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Tok {
|
||||
Name { name: String },
|
||||
Int { value: BigInt },
|
||||
Float { value: f64 },
|
||||
Complex { real: f64, imag: f64 },
|
||||
String { value: String, kind: StringKind },
|
||||
Bytes { value: Vec<u8> },
|
||||
Name {
|
||||
name: String,
|
||||
},
|
||||
Int {
|
||||
value: BigInt,
|
||||
},
|
||||
Float {
|
||||
value: f64,
|
||||
},
|
||||
Complex {
|
||||
real: f64,
|
||||
imag: f64,
|
||||
},
|
||||
String {
|
||||
value: String,
|
||||
kind: StringKind,
|
||||
triple_quoted: bool,
|
||||
},
|
||||
Newline,
|
||||
Indent,
|
||||
Dedent,
|
||||
|
@ -107,13 +119,6 @@ pub enum Tok {
|
|||
Yield,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
pub enum StringKind {
|
||||
Normal,
|
||||
F,
|
||||
U,
|
||||
}
|
||||
|
||||
impl fmt::Display for Tok {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Tok::*;
|
||||
|
@ -122,26 +127,13 @@ impl fmt::Display for Tok {
|
|||
Int { value } => write!(f, "'{value}'"),
|
||||
Float { value } => write!(f, "'{value}'"),
|
||||
Complex { real, imag } => write!(f, "{real}j{imag}"),
|
||||
String { value, kind } => {
|
||||
match kind {
|
||||
StringKind::F => f.write_str("f")?,
|
||||
StringKind::U => f.write_str("u")?,
|
||||
StringKind::Normal => {}
|
||||
}
|
||||
write!(f, "{value:?}")
|
||||
}
|
||||
Bytes { value } => {
|
||||
write!(f, "b\"")?;
|
||||
for i in value {
|
||||
match i {
|
||||
9 => f.write_str("\\t")?,
|
||||
10 => f.write_str("\\n")?,
|
||||
13 => f.write_str("\\r")?,
|
||||
32..=126 => f.write_char(*i as char)?,
|
||||
_ => write!(f, "\\x{i:02x}")?,
|
||||
}
|
||||
}
|
||||
f.write_str("\"")
|
||||
String {
|
||||
value,
|
||||
kind,
|
||||
triple_quoted,
|
||||
} => {
|
||||
let quotes = "\"".repeat(if *triple_quoted { 3 } else { 1 });
|
||||
write!(f, "{kind}{quotes}{value}{quotes}")
|
||||
}
|
||||
Newline => f.write_str("Newline"),
|
||||
Indent => f.write_str("Indent"),
|
||||
|
@ -236,3 +228,50 @@ impl fmt::Display for Tok {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
pub enum StringKind {
|
||||
String,
|
||||
FString,
|
||||
Bytes,
|
||||
RawString,
|
||||
RawFString,
|
||||
RawBytes,
|
||||
Unicode,
|
||||
}
|
||||
|
||||
impl fmt::Display for StringKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use StringKind::*;
|
||||
match self {
|
||||
String => f.write_str(""),
|
||||
FString => f.write_str("f"),
|
||||
Bytes => f.write_str("b"),
|
||||
RawString => f.write_str("r"),
|
||||
RawFString => f.write_str("rf"),
|
||||
RawBytes => f.write_str("rb"),
|
||||
Unicode => f.write_str("u"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StringKind {
|
||||
pub fn is_raw(&self) -> bool {
|
||||
use StringKind::{RawBytes, RawFString, RawString};
|
||||
matches!(self, RawString | RawFString | RawBytes)
|
||||
}
|
||||
|
||||
pub fn is_fstring(&self) -> bool {
|
||||
use StringKind::{FString, RawFString};
|
||||
matches!(self, FString | RawFString)
|
||||
}
|
||||
|
||||
pub fn is_bytes(&self) -> bool {
|
||||
use StringKind::{Bytes, RawBytes};
|
||||
matches!(self, Bytes | RawBytes)
|
||||
}
|
||||
|
||||
pub fn is_unicode(&self) -> bool {
|
||||
matches!(self, StringKind::Unicode)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue