mirror of
https://github.com/RustPython/Parser.git
synced 2025-08-30 15:18:02 +00:00
Fix FormattedValue location
This commit is contained in:
parent
201d08583a
commit
439298e735
22 changed files with 2195 additions and 355 deletions
|
@ -1,35 +1,79 @@
|
|||
use crate::{
|
||||
ast::{Constant, Expr, ExprKind, Location},
|
||||
error::{LexicalError, LexicalErrorType},
|
||||
fstring::parse_located_fstring,
|
||||
string_parser::parse_string,
|
||||
token::StringKind,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
|
||||
pub fn parse_strings(
|
||||
values: Vec<(Location, (String, StringKind), Location)>,
|
||||
values: Vec<(Location, (String, StringKind, bool), Location)>,
|
||||
) -> Result<Expr, LexicalError> {
|
||||
// Preserve the initial location and kind.
|
||||
let initial_start = values[0].0;
|
||||
let last_end = values.last().unwrap().2;
|
||||
let initial_kind = (values[0].1 .1 == StringKind::U).then(|| "u".to_owned());
|
||||
let initial_kind = (values[0].1 .1 == StringKind::Unicode).then(|| "u".to_owned());
|
||||
let has_fstring = values.iter().any(|(_, (_, kind, ..), _)| kind.is_fstring());
|
||||
let num_bytes = values
|
||||
.iter()
|
||||
.filter(|(_, (_, kind, ..), _)| kind.is_bytes())
|
||||
.count();
|
||||
let has_bytes = num_bytes > 0;
|
||||
|
||||
// Optimization: fast-track the common case of a single string.
|
||||
if matches!(&*values, [(_, (_, StringKind::Normal | StringKind::U), _)]) {
|
||||
let value = values.into_iter().last().unwrap().1 .0;
|
||||
if has_bytes && num_bytes < values.len() {
|
||||
return Err(LexicalError {
|
||||
error: LexicalErrorType::OtherError(
|
||||
"cannot mix bytes and nonbytes literals".to_owned(),
|
||||
),
|
||||
location: initial_start,
|
||||
});
|
||||
}
|
||||
|
||||
if has_bytes {
|
||||
let mut content: Vec<u8> = vec![];
|
||||
for (start, (source, kind, triple_quoted), end) in values {
|
||||
for value in parse_string(&source, kind, triple_quoted, start, end)? {
|
||||
match value.node {
|
||||
ExprKind::Constant {
|
||||
value: Constant::Bytes(value),
|
||||
..
|
||||
} => content.extend(value),
|
||||
_ => unreachable!("Unexpected non-bytes expression."),
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(Expr::new(
|
||||
initial_start,
|
||||
last_end,
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(value),
|
||||
kind: initial_kind,
|
||||
value: Constant::Bytes(content),
|
||||
kind: None,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
// Determine whether the list of values contains any f-strings. (If not, we can return a
|
||||
// single Constant at the end, rather than a JoinedStr.)
|
||||
let mut has_fstring = false;
|
||||
if !has_fstring {
|
||||
let mut content: Vec<String> = vec![];
|
||||
for (start, (source, kind, triple_quoted), end) in values {
|
||||
for value in parse_string(&source, kind, triple_quoted, start, end)? {
|
||||
match value.node {
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(value),
|
||||
..
|
||||
} => content.push(value),
|
||||
_ => unreachable!("Unexpected non-string expression."),
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(Expr::new(
|
||||
initial_start,
|
||||
last_end,
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(content.join("")),
|
||||
kind: initial_kind,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
// De-duplicate adjacent constants.
|
||||
let mut deduped: Vec<Expr> = vec![];
|
||||
|
@ -46,34 +90,20 @@ pub fn parse_strings(
|
|||
)
|
||||
};
|
||||
|
||||
for (start, (string, string_kind), end) in values {
|
||||
match string_kind {
|
||||
StringKind::Normal | StringKind::U => current.push(string),
|
||||
StringKind::F => {
|
||||
has_fstring = true;
|
||||
for value in
|
||||
parse_located_fstring(&string, start, end).map_err(|e| LexicalError {
|
||||
location: start,
|
||||
error: LexicalErrorType::FStringError(e.error),
|
||||
})?
|
||||
{
|
||||
match value.node {
|
||||
ExprKind::FormattedValue { .. } => {
|
||||
if !current.is_empty() {
|
||||
deduped.push(take_current(&mut current));
|
||||
}
|
||||
deduped.push(value)
|
||||
}
|
||||
ExprKind::Constant { value, .. } => {
|
||||
if let Constant::Str(value) = value {
|
||||
current.push(value);
|
||||
} else {
|
||||
unreachable!("Unexpected non-string constant.");
|
||||
}
|
||||
}
|
||||
_ => unreachable!("Unexpected non-string expression."),
|
||||
for (start, (source, kind, triple_quoted), end) in values {
|
||||
for value in parse_string(&source, kind, triple_quoted, start, end)? {
|
||||
match value.node {
|
||||
ExprKind::FormattedValue { .. } => {
|
||||
if !current.is_empty() {
|
||||
deduped.push(take_current(&mut current));
|
||||
}
|
||||
deduped.push(value)
|
||||
}
|
||||
ExprKind::Constant {
|
||||
value: Constant::Str(value),
|
||||
..
|
||||
} => current.push(value),
|
||||
_ => unreachable!("Unexpected non-string expression."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -101,64 +131,153 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_parse_string_concat() {
|
||||
let source = String::from("'Hello ' 'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' 'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_string_concat_1() {
|
||||
let source = String::from("'Hello ' u'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' u'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_string_concat_2() {
|
||||
let source = String::from("u'Hello ' 'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'Hello ' 'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_f_string_concat_1() {
|
||||
let source = String::from("'Hello ' f'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' f'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_f_string_concat_2() {
|
||||
let source = String::from("'Hello ' f'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' f'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_f_string_concat_3() {
|
||||
let source = String::from("'Hello ' f'world{\"!\"}'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "'Hello ' f'world{\"!\"}'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_f_string_concat_1() {
|
||||
let source = String::from("u'Hello ' f'world'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'Hello ' f'world'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_u_f_string_concat_2() {
|
||||
let source = String::from("u'Hello ' f'world' '!'");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'Hello ' f'world' '!'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_string_triple_quotes_with_kind() {
|
||||
let source = String::from("u'''Hello, world!'''");
|
||||
let parse_ast = parse_program(&source, "<test>").unwrap();
|
||||
let source = "u'''Hello, world!'''";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_quoted_byte() {
|
||||
// single quote
|
||||
let source = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_double_quoted_byte() {
|
||||
// double quote
|
||||
let source = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_char_in_byte_literal() {
|
||||
// backslash does not escape
|
||||
let source = r##"b"omkmok\Xaa""##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_byte_literal_1() {
|
||||
let source = r"rb'\x1z'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_byte_literal_2() {
|
||||
let source = r"rb'\\'";
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_octet() {
|
||||
let source = r##"b'\43a\4\1234'"##;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escaped_newline() {
|
||||
let source = r#"f"\n{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_unescaped_newline() {
|
||||
let source = r#"f"""
|
||||
{x}""""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_escaped_character() {
|
||||
let source = r#"f"\\{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_raw_fstring() {
|
||||
let source = r#"rf"{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_quoted_raw_fstring() {
|
||||
let source = r#"rf"""{x}""""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fstring_line_continuation() {
|
||||
let source = r#"rf"\
|
||||
{x}""#;
|
||||
let parse_ast = parse_program(source, "<test>").unwrap();
|
||||
insta::assert_debug_snapshot!(parse_ast);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue