mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-04 02:48:17 +00:00
Avoid lookaheads in sysconfig
parser (#9879)
## Summary Based on some review feedback from https://github.com/astral-sh/uv/pull/9857.
This commit is contained in:
parent
08ea79cc33
commit
53dfe0de52
1 changed files with 114 additions and 46 deletions
|
@ -4,7 +4,7 @@ use std::str::FromStr;
|
|||
use serde::Serialize;
|
||||
use serde_json::ser::PrettyFormatter;
|
||||
|
||||
use crate::sysconfig::cursor::{Cursor, EOF_CHAR};
|
||||
use crate::sysconfig::cursor::Cursor;
|
||||
|
||||
/// A value in the [`SysconfigData`] map.
|
||||
///
|
||||
|
@ -106,7 +106,11 @@ impl FromStr for SysconfigData {
|
|||
|
||||
let mut map = BTreeMap::new();
|
||||
loop {
|
||||
match cursor.first() {
|
||||
let Some(next) = cursor.bump() else {
|
||||
return Err(Error::UnexpectedEof);
|
||||
};
|
||||
|
||||
match next {
|
||||
'\'' | '"' => {
|
||||
// Parse key.
|
||||
let key = parse_string(&mut cursor)?;
|
||||
|
@ -136,13 +140,10 @@ impl FromStr for SysconfigData {
|
|||
}
|
||||
|
||||
// Skip whitespace.
|
||||
' ' | '\n' | '\r' | '\t' => {
|
||||
cursor.bump();
|
||||
}
|
||||
' ' | '\n' | '\r' | '\t' => {}
|
||||
|
||||
// When we see a closing brace, we're done.
|
||||
'}' => {
|
||||
cursor.bump();
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -155,77 +156,81 @@ impl FromStr for SysconfigData {
|
|||
}
|
||||
|
||||
/// Parse a Python string literal.
|
||||
///
|
||||
/// Expects the previous character to be the opening quote character.
|
||||
fn parse_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||
let quote = cursor.bump().expect("Expected opening quote");
|
||||
let quote = cursor.previous();
|
||||
assert!(quote == '\'' || quote == '"', "Invalid quote character");
|
||||
|
||||
let mut result = String::new();
|
||||
loop {
|
||||
if cursor.first() == EOF_CHAR {
|
||||
return Err(Error::UnexpectedCharacter(EOF_CHAR));
|
||||
}
|
||||
let Some(c) = cursor.bump() else {
|
||||
return Err(Error::UnexpectedEof);
|
||||
};
|
||||
match c {
|
||||
'\\' => {
|
||||
// Handle escaped quotes.
|
||||
if cursor.first() == quote {
|
||||
// Consume the backslash.
|
||||
cursor.bump();
|
||||
result.push(quote);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle escaped quotes.
|
||||
if cursor.first() == '\\' {
|
||||
// Consume the backslash.
|
||||
cursor.bump();
|
||||
if cursor.first() == quote {
|
||||
result.push(quote);
|
||||
// Keep the backslash and following character.
|
||||
result.push('\\');
|
||||
result.push(cursor.first());
|
||||
cursor.bump();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Keep the backslash and following character.
|
||||
result.push('\\');
|
||||
result.push(cursor.first());
|
||||
cursor.bump();
|
||||
continue;
|
||||
}
|
||||
// Consume closing quote.
|
||||
c if c == quote => {
|
||||
break;
|
||||
}
|
||||
|
||||
// Consume closing quote.
|
||||
if cursor.first() == quote {
|
||||
cursor.bump();
|
||||
break;
|
||||
c => {
|
||||
result.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
result.push(cursor.first());
|
||||
cursor.bump();
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Parse a Python string, which may be a concatenation of multiple string literals.
|
||||
///
|
||||
/// Expects the cursor to start at an opening quote character.
|
||||
fn parse_concatenated_string(cursor: &mut Cursor) -> Result<String, Error> {
|
||||
let mut result = String::new();
|
||||
loop {
|
||||
let c = cursor.first();
|
||||
if c == EOF_CHAR {
|
||||
break;
|
||||
let Some(c) = cursor.bump() else {
|
||||
return Err(Error::UnexpectedEof);
|
||||
};
|
||||
match c {
|
||||
'\'' | '"' => {
|
||||
// Parse a new string fragment and append it.
|
||||
result.push_str(&parse_string(cursor)?);
|
||||
}
|
||||
c if is_python_whitespace(c) => {
|
||||
// Skip whitespace between fragments
|
||||
}
|
||||
c => return Err(Error::UnexpectedCharacter(c)),
|
||||
}
|
||||
if c == '\'' || c == '"' {
|
||||
// Parse a new string fragment and append it.
|
||||
result.push_str(&parse_string(cursor)?);
|
||||
} else if is_python_whitespace(c) {
|
||||
// Skip whitespace between fragments
|
||||
cursor.bump();
|
||||
} else if c == ',' || c == '}' {
|
||||
// End of value.
|
||||
|
||||
// Lookahead to the end of the string.
|
||||
if matches!(cursor.first(), ',' | '}') {
|
||||
break;
|
||||
} else {
|
||||
return Err(Error::UnexpectedCharacter(c));
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Parse an integer literal.
|
||||
///
|
||||
/// Expects the cursor to start at the first digit of the integer.
|
||||
fn parse_int(cursor: &mut Cursor) -> Result<i32, std::num::ParseIntError> {
|
||||
let mut result = String::new();
|
||||
loop {
|
||||
let c = cursor.first();
|
||||
if c == EOF_CHAR {
|
||||
break;
|
||||
}
|
||||
if !c.is_ascii_digit() {
|
||||
break;
|
||||
}
|
||||
|
@ -251,6 +256,8 @@ pub enum Error {
|
|||
MissingOpenBrace,
|
||||
#[error("Unexpected character: {0}")]
|
||||
UnexpectedCharacter(char),
|
||||
#[error("Unexpected end of file")]
|
||||
UnexpectedEof,
|
||||
#[error("Failed to parse integer")]
|
||||
ParseInt(#[from] std::num::ParseIntError),
|
||||
#[error("`_sysconfigdata_` is missing a header comment")]
|
||||
|
@ -289,6 +296,32 @@ mod tests {
|
|||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_trailing_comma() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "value1",
|
||||
"key2": 42,
|
||||
"key3": "multi-part" " string",
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>().expect("Parsing failed");
|
||||
let snapshot = result.to_string_pretty().unwrap();
|
||||
|
||||
insta::assert_snapshot!(snapshot, @r###"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": "value1",
|
||||
"key2": 42,
|
||||
"key3": "multi-part string"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_integer_values() {
|
||||
let input = indoc::indoc!(
|
||||
|
@ -407,4 +440,39 @@ mod tests {
|
|||
"Expected parsing to fail due to unexpected character"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unexpected_eof() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": 123
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>();
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Expected parsing to fail due to unexpected character"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unexpected_comma() {
|
||||
let input = indoc::indoc!(
|
||||
r#"
|
||||
# system configuration generated and used by the sysconfig module
|
||||
build_time_vars = {
|
||||
"key1": 123,,
|
||||
}
|
||||
"#
|
||||
);
|
||||
|
||||
let result = input.parse::<SysconfigData>();
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Expected parsing to fail due to unexpected character"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue