Avoid duplicating backslashes in sysconfig parser (#10063)

## Summary

We had a bug in our handling of escape sequences that caused us to
duplicate backslashes. If you installed repeatedly, we'd keep doubling
them, leading to an exponential blowup.

Closes #10060.
This commit is contained in:
Charlie Marsh 2024-12-20 13:52:42 -05:00 committed by GitHub
parent 2ca5c2ba70
commit f3c5b63095
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -166,18 +166,20 @@ fn parse_string(cursor: &mut Cursor, quote: char) -> Result<String, Error> {
};
match c {
'\\' => {
// Handle escaped quotes.
if cursor.first() == quote {
// Consume the backslash.
cursor.bump();
result.push(quote);
continue;
}
// Keep the backslash and following character.
result.push('\\');
result.push(cursor.first());
cursor.bump();
// Treat the next character as a literal.
//
// See: https://github.com/astral-sh/ruff/blob/d47fba1e4aeeb18085900dfbbcd187e90d536913/crates/ruff_python_parser/src/string.rs#L194
let Some(c) = cursor.bump() else {
return Err(Error::UnexpectedEof);
};
result.push(match c {
'\\' => '\\',
'\'' => '\'',
'\"' => '"',
_ => {
return Err(Error::UnrecognizedEscape(c));
}
});
}
// Consume closing quote.
@ -255,6 +257,8 @@ pub enum Error {
UnexpectedCharacter(char),
#[error("Unexpected end of file")]
UnexpectedEof,
#[error("Unrecognized escape sequence: {0}")]
UnrecognizedEscape(char),
#[error("Failed to parse integer")]
ParseInt(#[from] std::num::ParseIntError),
#[error("`_sysconfigdata_` is missing a header comment")]
@ -293,6 +297,64 @@ mod tests {
"###);
}
#[test]
fn test_parse_backslash() {
let input = indoc::indoc!(
r#"
# system configuration generated and used by the sysconfig module
build_time_vars = {
"key1": "value1\"value2\"value3",
"key2": "value1\\value2\\value3",
"key3": "value1\\\"value2\\\"value3",
"key4": "value1\\\\value2\\\\value3",
}
"#
);
let result = input.parse::<SysconfigData>().expect("Parsing failed");
let snapshot = result.to_string_pretty().unwrap();
insta::assert_snapshot!(snapshot, @r###"
# system configuration generated and used by the sysconfig module
build_time_vars = {
"key1": "value1\"value2\"value3",
"key2": "value1\\value2\\value3",
"key3": "value1\\\"value2\\\"value3",
"key4": "value1\\\\value2\\\\value3"
}
"###);
}
#[test]
fn test_parse_trailing_backslash() {
let input = indoc::indoc!(
r#"
# system configuration generated and used by the sysconfig module
build_time_vars = {
"key1": "value1\\value2\\value3\",
}
"#
);
let result = input.parse::<SysconfigData>();
assert!(matches!(result, Err(Error::UnexpectedEof)));
}
#[test]
fn test_parse_unrecognized_escape() {
let input = indoc::indoc!(
r#"
# system configuration generated and used by the sysconfig module
build_time_vars = {
"key1": "value1\value2",
}
"#
);
let result = input.parse::<SysconfigData>();
assert!(matches!(result, Err(Error::UnrecognizedEscape('v'))));
}
#[test]
fn test_parse_trailing_comma() {
let input = indoc::indoc!(