Add support for nested replacements inside format specifications (#6616)

Closes https://github.com/astral-sh/ruff/issues/6442

Python string formatting like `"hello {place}".format(place="world")`
supports format specifications for replaced content such as `"hello
{place:>10}".format(place="world")` which will align the text to the
right in a container filled up to ten characters.

Ruff parses formatted strings into `FormatPart`s each of which is either
a `Field` (content in `{...}`) or a `Literal` (the normal content).
Fields are parsed into name and format specifier sections (we'll ignore
conversion specifiers for now).

There are a myriad of specifiers that can be used in a `FormatSpec`.
Unfortunately for linters, the specifier values can be dynamically set.
For example, `"hello {place:{align}{width}}".format(place="world",
align=">", width=10)` and `"hello {place:{fmt}}".format(place="world",
fmt=">10")` will yield the same string as before but variables can be
used to determine the formatting. In this case, when parsing the format
specifier we can't know what _kind_ of specifier is being used as their
meaning is determined by both position and value.

Ruff does not support nested replacements and our current data model
does not support the concept. Here the data model is updated to support
this concept, although linting of specifications with replacements will
be inherently limited. We could split format specifications into two
types, one without any replacements that we can perform lints with and
one with replacements that we cannot inspect. However, it seems
excessive to drop all parsing of format specifiers due to the presence
of a replacement. Instead, I've opted to parse replacements eagerly and
ignore their possible effect on other format specifiers. This will allow
us to retain a simple interface for `FormatSpec` and most syntax checks.
We may need to add some handling to relax errors if a replacement was
seen previously.

It's worth noting that the nested replacement _can_ also include a
format specification although it may fail at runtime if you produce an
invalid outer format specification. For example, `"hello
{place:{fmt:<2}}".format(place="world", fmt=">10")` is valid so we need
to represent each nested replacement as a full `FormatPart`.

## Test plan

Adding unit tests for `FormatSpec` parsing and snapshots for PLE1300
This commit is contained in:
Zanie Blue 2023-08-17 09:07:30 -05:00 committed by GitHub
parent 1334232168
commit d0f2a8e424
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 266 additions and 39 deletions

View file

@ -14,7 +14,10 @@
"{:s} {:y}".format("hello", "world") # [bad-format-character] "{:s} {:y}".format("hello", "world") # [bad-format-character]
"{:*^30s}".format("centered") "{:*^30s}".format("centered") # OK
"{:{s}}".format("hello", s="s") # OK (nested replacement value not checked)
"{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked)
## f-strings ## f-strings

View file

@ -11,7 +11,9 @@ pub(crate) fn error_to_string(err: &FormatParseError) -> String {
FormatParseError::InvalidCharacterAfterRightBracket => { FormatParseError::InvalidCharacterAfterRightBracket => {
"Only '.' or '[' may follow ']' in format field specifier" "Only '.' or '[' may follow ']' in format field specifier"
} }
FormatParseError::InvalidFormatSpecifier => "Max string recursion exceeded", FormatParseError::PlaceholderRecursionExceeded => {
"Max format placeholder recursion exceeded"
}
FormatParseError::MissingStartBracket => "Single '}' encountered in format string", FormatParseError::MissingStartBracket => "Single '}' encountered in format string",
FormatParseError::MissingRightBracket => "Expected '}' before end of string", FormatParseError::MissingRightBracket => "Expected '}' before end of string",
FormatParseError::UnmatchedBracket => "Single '{' encountered in format string", FormatParseError::UnmatchedBracket => "Single '{' encountered in format string",

View file

@ -28,7 +28,7 @@ F521.py:3:1: F521 `.format` call has invalid format string: Expected '}' before
5 | "{:{:{}}}".format(1, 2, 3) 5 | "{:{:{}}}".format(1, 2, 3)
| |
F521.py:5:1: F521 `.format` call has invalid format string: Max string recursion exceeded F521.py:5:1: F521 `.format` call has invalid format string: Max format placeholder recursion exceeded
| |
3 | "{foo[}".format(foo=1) 3 | "{foo[}".format(foo=1)
4 | # too much string recursion (placeholder-in-placeholder) 4 | # too much string recursion (placeholder-in-placeholder)

View file

@ -49,16 +49,39 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
continue; continue;
}; };
if let Err(FormatSpecError::InvalidFormatType) = FormatSpec::parse(format_spec) { match FormatSpec::parse(format_spec) {
checker.diagnostics.push(Diagnostic::new( Err(FormatSpecError::InvalidFormatType) => {
BadStringFormatCharacter { checker.diagnostics.push(Diagnostic::new(
// The format type character is always the last one. BadStringFormatCharacter {
// More info in the official spec: // The format type character is always the last one.
// https://docs.python.org/3/library/string.html#format-specification-mini-language // More info in the official spec:
format_char: format_spec.chars().last().unwrap(), // https://docs.python.org/3/library/string.html#format-specification-mini-language
}, format_char: format_spec.chars().last().unwrap(),
range, },
)); range,
));
}
Err(_) => {}
Ok(format_spec) => {
for replacement in format_spec.replacements() {
let FormatPart::Field { format_spec, .. } = replacement else {
continue;
};
if let Err(FormatSpecError::InvalidFormatType) =
FormatSpec::parse(format_spec)
{
checker.diagnostics.push(Diagnostic::new(
BadStringFormatCharacter {
// The format type character is always the last one.
// More info in the official spec:
// https://docs.python.org/3/library/string.html#format-specification-mini-language
format_char: format_spec.chars().last().unwrap(),
},
range,
));
}
}
}
} }
} }
} }

View file

@ -48,7 +48,17 @@ bad_string_format_character.py:15:1: PLE1300 Unsupported format character 'y'
15 | "{:s} {:y}".format("hello", "world") # [bad-format-character] 15 | "{:s} {:y}".format("hello", "world") # [bad-format-character]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
16 | 16 |
17 | "{:*^30s}".format("centered") 17 | "{:*^30s}".format("centered") # OK
|
bad_string_format_character.py:20:1: PLE1300 Unsupported format character 'y'
|
18 | "{:{s}}".format("hello", s="s") # OK (nested replacement value not checked)
19 |
20 | "{:{s:y}}".format("hello", s="s") # [bad-format-character] (nested replacement format spec checked)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLE1300
21 |
22 | ## f-strings
| |

View file

@ -183,6 +183,35 @@ impl FormatParse for FormatType {
} }
} }
/// The format specification component of a format field
///
/// For example the content would be parsed from `<20` in:
/// ```python
/// "hello {name:<20}".format(name="test")
/// ```
///
/// Format specifications allow nested replacements for dynamic formatting.
/// For example, the following statements are equivalent:
/// ```python
/// "hello {name:{fmt}}".format(name="test", fmt="<20")
/// "hello {name:{align}{width}}".format(name="test", align="<", width="20")
/// "hello {name:<20{empty}>}".format(name="test", empty="")
/// ```
///
/// Nested replacements can include additional format specifiers.
/// ```python
/// "hello {name:{fmt:*>}}".format(name="test", fmt="<20")
/// ```
///
/// However, replacements can only be singly nested (preserving our sanity).
/// A [`FormatSpecError::PlaceholderRecursionExceeded`] will be raised while parsing in this case.
/// ```python
/// "hello {name:{fmt:{not_allowed}}}".format(name="test", fmt="<20") # Syntax error
/// ```
///
/// When replacements are present in a format specification, we will parse them and
/// store them in [`FormatSpec`] but will otherwise ignore them if they would introduce
/// an invalid format specification at runtime.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct FormatSpec { pub struct FormatSpec {
// Ex) `!s` in `'{!s}'` // Ex) `!s` in `'{!s}'`
@ -203,6 +232,16 @@ pub struct FormatSpec {
precision: Option<usize>, precision: Option<usize>,
// Ex) `f` in `'{:+f}'` // Ex) `f` in `'{:+f}'`
format_type: Option<FormatType>, format_type: Option<FormatType>,
// Ex) `x` and `y` in `'{:*{x},{y}b}'`
replacements: Vec<FormatPart>,
}
#[derive(Copy, Clone, Debug, PartialEq, Default)]
pub enum AllowPlaceholderNesting {
#[default]
Yes,
No,
AllowPlaceholderNesting,
} }
fn get_num_digits(text: &str) -> usize { fn get_num_digits(text: &str) -> usize {
@ -279,17 +318,44 @@ fn parse_precision(text: &str) -> Result<(Option<usize>, &str), FormatSpecError>
}) })
} }
/// Parses a format part within a format spec
fn parse_nested_placeholder<'a>(
parts: &mut Vec<FormatPart>,
text: &'a str,
) -> Result<&'a str, FormatSpecError> {
match FormatString::parse_spec(text, AllowPlaceholderNesting::No) {
// Not a nested replacement, OK
Err(FormatParseError::MissingStartBracket) => Ok(text),
Err(err) => Err(FormatSpecError::InvalidPlaceholder(err)),
Ok((format_part, text)) => {
parts.push(format_part);
Ok(text)
}
}
}
impl FormatSpec { impl FormatSpec {
pub fn parse(text: &str) -> Result<Self, FormatSpecError> { pub fn parse(text: &str) -> Result<Self, FormatSpecError> {
let mut replacements = vec![];
// get_integer in CPython // get_integer in CPython
let text = parse_nested_placeholder(&mut replacements, text)?;
let (conversion, text) = FormatConversion::parse(text); let (conversion, text) = FormatConversion::parse(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (mut fill, mut align, text) = parse_fill_and_align(text); let (mut fill, mut align, text) = parse_fill_and_align(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (sign, text) = FormatSign::parse(text); let (sign, text) = FormatSign::parse(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (alternate_form, text) = parse_alternate_form(text); let (alternate_form, text) = parse_alternate_form(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (zero, text) = parse_zero(text); let (zero, text) = parse_zero(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (width, text) = parse_number(text)?; let (width, text) = parse_number(text)?;
let text = parse_nested_placeholder(&mut replacements, text)?;
let (grouping_option, text) = FormatGrouping::parse(text); let (grouping_option, text) = FormatGrouping::parse(text);
let text = parse_nested_placeholder(&mut replacements, text)?;
let (precision, text) = parse_precision(text)?; let (precision, text) = parse_precision(text)?;
let text = parse_nested_placeholder(&mut replacements, text)?;
let (format_type, _text) = if text.is_empty() { let (format_type, _text) = if text.is_empty() {
(None, text) (None, text)
} else { } else {
@ -320,8 +386,13 @@ impl FormatSpec {
grouping_option, grouping_option,
precision, precision,
format_type, format_type,
replacements,
}) })
} }
pub fn replacements(&self) -> &[FormatPart] {
return self.replacements.as_slice();
}
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -330,6 +401,8 @@ pub enum FormatSpecError {
PrecisionTooBig, PrecisionTooBig,
InvalidFormatSpecifier, InvalidFormatSpecifier,
InvalidFormatType, InvalidFormatType,
InvalidPlaceholder(FormatParseError),
PlaceholderRecursionExceeded,
UnspecifiedFormat(char, char), UnspecifiedFormat(char, char),
UnknownFormatCode(char, &'static str), UnknownFormatCode(char, &'static str),
PrecisionNotAllowed, PrecisionNotAllowed,
@ -344,7 +417,7 @@ pub enum FormatParseError {
UnmatchedBracket, UnmatchedBracket,
MissingStartBracket, MissingStartBracket,
UnescapedStartBracketInLiteral, UnescapedStartBracketInLiteral,
InvalidFormatSpecifier, PlaceholderRecursionExceeded,
UnknownConversion, UnknownConversion,
EmptyAttribute, EmptyAttribute,
MissingRightBracket, MissingRightBracket,
@ -363,8 +436,8 @@ impl std::fmt::Display for FormatParseError {
Self::UnescapedStartBracketInLiteral => { Self::UnescapedStartBracketInLiteral => {
std::write!(fmt, "unescaped start bracket in literal") std::write!(fmt, "unescaped start bracket in literal")
} }
Self::InvalidFormatSpecifier => { Self::PlaceholderRecursionExceeded => {
std::write!(fmt, "invalid format specifier") std::write!(fmt, "multiply nested replacement not allowed")
} }
Self::UnknownConversion => { Self::UnknownConversion => {
std::write!(fmt, "unknown conversion") std::write!(fmt, "unknown conversion")
@ -564,20 +637,22 @@ impl FormatString {
}) })
} }
fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> { fn parse_spec(
text: &str,
allow_nesting: AllowPlaceholderNesting,
) -> Result<(FormatPart, &str), FormatParseError> {
let Some(text) = text.strip_prefix('{') else {
return Err(FormatParseError::MissingStartBracket);
};
let mut nested = false; let mut nested = false;
let mut end_bracket_pos = None;
let mut left = String::new(); let mut left = String::new();
// There may be one layer nesting brackets in spec
for (idx, c) in text.char_indices() { for (idx, c) in text.char_indices() {
if idx == 0 { if c == '{' {
if c != '{' { // There may be one layer nesting brackets in spec
return Err(FormatParseError::MissingStartBracket); if nested || allow_nesting == AllowPlaceholderNesting::No {
} return Err(FormatParseError::PlaceholderRecursionExceeded);
} else if c == '{' {
if nested {
return Err(FormatParseError::InvalidFormatSpecifier);
} }
nested = true; nested = true;
left.push(c); left.push(c);
@ -588,19 +663,13 @@ impl FormatString {
left.push(c); left.push(c);
continue; continue;
} }
end_bracket_pos = Some(idx); let (_, right) = text.split_at(idx + 1);
break; let format_part = FormatString::parse_part_in_brackets(&left)?;
} else { return Ok((format_part, right));
left.push(c);
} }
left.push(c);
} }
if let Some(pos) = end_bracket_pos { Err(FormatParseError::UnmatchedBracket)
let (_, right) = text.split_at(pos);
let format_part = FormatString::parse_part_in_brackets(&left)?;
Ok((format_part, &right[1..]))
} else {
Err(FormatParseError::UnmatchedBracket)
}
} }
} }
@ -619,7 +688,7 @@ impl<'a> FromTemplate<'a> for FormatString {
// Try to parse both literals and bracketed format parts until we // Try to parse both literals and bracketed format parts until we
// run out of text // run out of text
cur_text = FormatString::parse_literal(cur_text) cur_text = FormatString::parse_literal(cur_text)
.or_else(|_| FormatString::parse_spec(cur_text)) .or_else(|_| FormatString::parse_spec(cur_text, AllowPlaceholderNesting::Yes))
.map(|(part, new_text)| { .map(|(part, new_text)| {
parts.push(part); parts.push(part);
new_text new_text
@ -671,6 +740,7 @@ mod tests {
grouping_option: None, grouping_option: None,
precision: None, precision: None,
format_type: None, format_type: None,
replacements: vec![],
}); });
assert_eq!(FormatSpec::parse("33"), expected); assert_eq!(FormatSpec::parse("33"), expected);
} }
@ -687,10 +757,86 @@ mod tests {
grouping_option: None, grouping_option: None,
precision: None, precision: None,
format_type: None, format_type: None,
replacements: vec![],
}); });
assert_eq!(FormatSpec::parse("<>33"), expected); assert_eq!(FormatSpec::parse("<>33"), expected);
} }
#[test]
fn test_format_part() {
let expected = Ok(FormatSpec {
conversion: None,
fill: None,
align: None,
sign: None,
alternate_form: false,
width: None,
grouping_option: None,
precision: None,
format_type: None,
replacements: vec![FormatPart::Field {
field_name: "x".to_string(),
conversion_spec: None,
format_spec: String::new(),
}],
});
assert_eq!(FormatSpec::parse("{x}"), expected);
}
#[test]
fn test_format_parts() {
let expected = Ok(FormatSpec {
conversion: None,
fill: None,
align: None,
sign: None,
alternate_form: false,
width: None,
grouping_option: None,
precision: None,
format_type: None,
replacements: vec![
FormatPart::Field {
field_name: "x".to_string(),
conversion_spec: None,
format_spec: String::new(),
},
FormatPart::Field {
field_name: "y".to_string(),
conversion_spec: None,
format_spec: "<2".to_string(),
},
FormatPart::Field {
field_name: "z".to_string(),
conversion_spec: None,
format_spec: String::new(),
},
],
});
assert_eq!(FormatSpec::parse("{x}{y:<2}{z}"), expected);
}
#[test]
fn test_format_part_with_others() {
let expected = Ok(FormatSpec {
conversion: None,
fill: None,
align: Some(FormatAlign::Left),
sign: None,
alternate_form: false,
width: Some(20),
grouping_option: None,
precision: None,
format_type: Some(FormatType::Binary),
replacements: vec![FormatPart::Field {
field_name: "x".to_string(),
conversion_spec: None,
format_spec: String::new(),
}],
});
assert_eq!(FormatSpec::parse("<{x}20b"), expected);
}
#[test] #[test]
fn test_all() { fn test_all() {
let expected = Ok(FormatSpec { let expected = Ok(FormatSpec {
@ -703,6 +849,7 @@ mod tests {
grouping_option: Some(FormatGrouping::Comma), grouping_option: Some(FormatGrouping::Comma),
precision: Some(11), precision: Some(11),
format_type: Some(FormatType::Binary), format_type: Some(FormatType::Binary),
replacements: vec![],
}); });
assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected); assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected);
} }
@ -729,6 +876,22 @@ mod tests {
assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected); assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected);
} }
#[test]
fn test_format_parse_nested_replacement() {
let expected = Ok(FormatString {
format_parts: vec![
FormatPart::Literal("abcd".to_owned()),
FormatPart::Field {
field_name: "1".to_owned(),
conversion_spec: None,
format_spec: "{a}".to_owned(),
},
],
});
assert_eq!(FormatString::from_str("abcd{1:{a}}"), expected);
}
#[test] #[test]
fn test_format_parse_multi_byte_char() { fn test_format_parse_multi_byte_char() {
assert!(FormatString::from_str("{a:%ЫйЯЧ}").is_ok()); assert!(FormatString::from_str("{a:%ЫйЯЧ}").is_ok());
@ -785,6 +948,32 @@ mod tests {
FormatSpec::parse("o!"), FormatSpec::parse("o!"),
Err(FormatSpecError::InvalidFormatSpecifier) Err(FormatSpecError::InvalidFormatSpecifier)
); );
assert_eq!(
FormatSpec::parse("{"),
Err(FormatSpecError::InvalidPlaceholder(
FormatParseError::UnmatchedBracket
))
);
assert_eq!(
FormatSpec::parse("{x"),
Err(FormatSpecError::InvalidPlaceholder(
FormatParseError::UnmatchedBracket
))
);
assert_eq!(
FormatSpec::parse("}"),
Err(FormatSpecError::InvalidFormatType)
);
assert_eq!(
FormatSpec::parse("{}}"),
Err(FormatSpecError::InvalidFormatType)
);
assert_eq!(
FormatSpec::parse("{{x}}"),
Err(FormatSpecError::InvalidPlaceholder(
FormatParseError::PlaceholderRecursionExceeded
))
);
assert_eq!( assert_eq!(
FormatSpec::parse("d "), FormatSpec::parse("d "),
Err(FormatSpecError::InvalidFormatSpecifier) Err(FormatSpecError::InvalidFormatSpecifier)