Add an implicit concatenation flag to string and bytes constants (#6512)

## Summary

Per the discussion in
https://github.com/astral-sh/ruff/discussions/6183, this PR adds an
`implicit_concatenated` flag to the string and bytes constant variants.
It's not actually _used_ anywhere as of this PR, but it is covered by
the tests.

Specifically, we now use a struct for the string and bytes cases, along
with the `Expr::FString` node. That struct holds the value, plus the
flag:

```rust
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum Constant {
    Str(StringConstant),
    Bytes(BytesConstant),
    ...
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct StringConstant {
    /// The string value as resolved by the parser (i.e., without quotes, or escape sequences, or
    /// implicit concatenations).
    pub value: String,
    /// Whether the string contains multiple string tokens that were implicitly concatenated.
    pub implicit_concatenated: bool,
}

impl Deref for StringConstant {
    type Target = str;
    fn deref(&self) -> &Self::Target {
        self.value.as_str()
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BytesConstant {
    /// The bytes value as resolved by the parser (i.e., without quotes, or escape sequences, or
    /// implicit concatenations).
    pub value: Vec<u8>,
    /// Whether the string contains multiple string tokens that were implicitly concatenated.
    pub implicit_concatenated: bool,
}

impl Deref for BytesConstant {
    type Target = [u8];
    fn deref(&self) -> &Self::Target {
        self.value.as_slice()
    }
}
```

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-08-14 09:46:54 -04:00 committed by GitHub
parent fc0c9507d0
commit f16e780e0a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
88 changed files with 1252 additions and 761 deletions

View file

@ -1,6 +1,4 @@
use itertools::Itertools;
use ruff_python_ast::{self as ast, Constant, Expr};
use ruff_python_ast::{self as ast, BytesConstant, Constant, Expr, StringConstant};
use ruff_python_ast::{ConversionFlag, Ranged};
use ruff_text_size::{TextLen, TextRange, TextSize};
@ -245,6 +243,7 @@ impl<'a> StringParser<'a> {
spec = Some(Box::new(Expr::from(ast::ExprFString {
values: parsed_spec,
implicit_concatenated: false,
range: self.range(start_location),
})));
}
@ -513,25 +512,25 @@ impl<'a> StringParser<'a> {
}
Ok(Expr::from(ast::ExprConstant {
value: Constant::Bytes(content.chars().map(|c| c as u8).collect()),
value: content.chars().map(|c| c as u8).collect::<Vec<u8>>().into(),
kind: None,
range: self.range(start_location),
}))
}
fn parse_string(&mut self) -> Result<Expr, LexicalError> {
let mut content = String::new();
let mut value = String::new();
let start_location = self.get_pos();
while let Some(ch) = self.next_char() {
match ch {
'\\' if !self.kind.is_raw() => {
content.push_str(&self.parse_escaped_char()?);
value.push_str(&self.parse_escaped_char()?);
}
ch => content.push(ch),
ch => value.push(ch),
}
}
Ok(Expr::from(ast::ExprConstant {
value: Constant::Str(content),
value: value.into(),
kind: self.kind.is_unicode().then(|| "u".to_string()),
range: self.range(start_location),
}))
@ -577,6 +576,7 @@ pub(crate) fn parse_strings(
.filter(|(_, (_, kind, ..), _)| kind.is_any_bytes())
.count();
let has_bytes = num_bytes > 0;
let implicit_concatenated = values.len() > 1;
if has_bytes && num_bytes < values.len() {
return Err(LexicalError {
@ -593,7 +593,7 @@ pub(crate) fn parse_strings(
for value in parse_string(&source, kind, triple_quoted, start)? {
match value {
Expr::Constant(ast::ExprConstant {
value: Constant::Bytes(value),
value: Constant::Bytes(BytesConstant { value, .. }),
..
}) => content.extend(value),
_ => unreachable!("Unexpected non-bytes expression."),
@ -601,7 +601,10 @@ pub(crate) fn parse_strings(
}
}
return Ok(ast::ExprConstant {
value: Constant::Bytes(content),
value: Constant::Bytes(BytesConstant {
value: content,
implicit_concatenated,
}),
kind: None,
range: TextRange::new(initial_start, last_end),
}
@ -614,7 +617,7 @@ pub(crate) fn parse_strings(
for value in parse_string(&source, kind, triple_quoted, start)? {
match value {
Expr::Constant(ast::ExprConstant {
value: Constant::Str(value),
value: Constant::Str(StringConstant { value, .. }),
..
}) => content.push(value),
_ => unreachable!("Unexpected non-string expression."),
@ -622,7 +625,10 @@ pub(crate) fn parse_strings(
}
}
return Ok(ast::ExprConstant {
value: Constant::Str(content.join("")),
value: Constant::Str(StringConstant {
value: content.join(""),
implicit_concatenated,
}),
kind: initial_kind,
range: TextRange::new(initial_start, last_end),
}
@ -637,7 +643,10 @@ pub(crate) fn parse_strings(
let take_current = |current: &mut Vec<String>, start, end| -> Expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Str(current.drain(..).join("")),
value: Constant::Str(StringConstant {
value: current.drain(..).collect::<String>(),
implicit_concatenated,
}),
kind: initial_kind.clone(),
range: TextRange::new(start, end),
})
@ -654,14 +663,14 @@ pub(crate) fn parse_strings(
deduped.push(value);
}
Expr::Constant(ast::ExprConstant {
value: Constant::Str(inner),
value: Constant::Str(StringConstant { value, .. }),
..
}) => {
if current.is_empty() {
current_start = value_range.start();
}
current_end = value_range.end();
current.push(inner);
current.push(value);
}
_ => unreachable!("Unexpected non-string expression."),
}
@ -673,6 +682,7 @@ pub(crate) fn parse_strings(
Ok(Expr::FString(ast::ExprFString {
values: deduped,
implicit_concatenated,
range: TextRange::new(initial_start, last_end),
}))
}
@ -963,6 +973,13 @@ mod tests {
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string_concat_4() {
let source = "'Hello ' f'world{\"!\"}' 'again!'";
let parse_ast = parse_suite(source, "<test>").unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_u_f_string_concat_1() {
let source = "u'Hello ' f'world'";
@ -1080,6 +1097,22 @@ mod tests {
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_string_spec() {
let source = "{foo:{''}}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_fstring_nested_concatenation_string_spec() {
let source = "{foo:{'' ''}}";
let parse_ast = parse_fstring(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
macro_rules! test_aliases_parse {
($($name:ident: $alias:expr,)*) => {
$(