feat: report line and column on requirements parser errors (#2100)

## Summary

Closes #2012

This changes `RequirementsTxtParserError::Parser` to take a line, column
instead of cursor location to improve reporting of parser errors. A new
function was added to compute the line and column based on the content
and cursor location when a parser error occurs for simplicity.

Given `uv pip compile .\requirements.txt` of below
```
numpy>=1,<2
  --borken
tqdm
```

Before:

``` 
error: Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement in `.\requirements.txt` at position 14
```

After:

```
error: Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement in `.\requirements.txt` at position 2:3
```

Open Question: Do we want to support `line:column` for other types of
errors? I didn't look dig other potential error types where this might
be desired.

## Test Plan

New test was added to `requirements-txt` crate with this example.
This commit is contained in:
samypr100 2024-03-01 16:50:11 -05:00 committed by GitHub
parent 0233a5771d
commit c7c3affee0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -412,9 +412,11 @@ impl RequirementsTxt {
}
RequirementsTxtStatement::IndexUrl(url) => {
if data.index_url.is_some() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser {
message: "Multiple `--index-url` values provided".to_string(),
location: s.cursor(),
line,
column,
});
}
data.index_url = Some(url);
@ -453,36 +455,36 @@ fn parse_entry(
eat_wrappable_whitespace(s);
while s.at(['\n', '\r', '#']) {
// skip comments
eat_trailing_line(s)?;
eat_trailing_line(content, s)?;
eat_wrappable_whitespace(s);
}
let start = s.cursor();
Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") {
let requirements_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let requirements_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let end = s.cursor();
eat_trailing_line(s)?;
eat_trailing_line(content, s)?;
RequirementsTxtStatement::Requirements {
filename: requirements_file.to_string(),
start,
end,
}
} else if s.eat_if("-c") || s.eat_if("--constraint") {
let constraints_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let constraints_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let end = s.cursor();
eat_trailing_line(s)?;
eat_trailing_line(content, s)?;
RequirementsTxtStatement::Constraint {
filename: constraints_file.to_string(),
start,
end,
}
} else if s.eat_if("-e") || s.eat_if("--editable") {
let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let editable_requirement = EditableRequirement::parse(path_or_url, working_dir)
.map_err(|err| err.with_offset(start))?;
RequirementsTxtStatement::EditableRequirement(editable_requirement)
} else if s.eat_if("-i") || s.eat_if("--index-url") {
let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let url = VerbatimUrl::parse(given)
.map(|url| url.with_given(given.to_owned()))
.map_err(|err| RequirementsTxtParserError::Url {
@ -493,7 +495,7 @@ fn parse_entry(
})?;
RequirementsTxtStatement::IndexUrl(url)
} else if s.eat_if("--extra-index-url") {
let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let url = VerbatimUrl::parse(given)
.map(|url| url.with_given(given.to_owned()))
.map_err(|err| RequirementsTxtParserError::Url {
@ -506,7 +508,7 @@ fn parse_entry(
} else if s.eat_if("--no-index") {
RequirementsTxtStatement::NoIndex
} else if s.eat_if("--find-links") || s.eat_if("-f") {
let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| {
RequirementsTxtParserError::Url {
source: err,
@ -524,11 +526,13 @@ fn parse_entry(
editable: false,
})
} else if let Some(char) = s.peek() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser {
message: format!(
"Unexpected '{char}', expected '-c', '-e', '-r' or the start of a requirement"
),
location: s.cursor(),
line,
column,
});
} else {
// EOF
@ -549,7 +553,7 @@ fn eat_wrappable_whitespace<'a>(s: &mut Scanner<'a>) -> &'a str {
}
/// Eats the end of line or a potential trailing comma
fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError> {
fn eat_trailing_line(content: &str, s: &mut Scanner) -> Result<(), RequirementsTxtParserError> {
s.eat_while([' ', '\t']);
match s.eat() {
None | Some('\n') => {} // End of file or end of line, nothing to do
@ -563,9 +567,11 @@ fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError>
}
}
Some(other) => {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser {
message: format!("Expected comment or end-of-line, found '{other}'"),
location: s.cursor(),
line,
column,
});
}
}
@ -669,8 +675,8 @@ fn parse_requirement_and_hashes(
}
})?;
let hashes = if has_hashes {
let hashes = parse_hashes(s)?;
eat_trailing_line(s)?;
let hashes = parse_hashes(content, s)?;
eat_trailing_line(content, s)?;
hashes
} else {
Vec::new()
@ -679,25 +685,27 @@ fn parse_requirement_and_hashes(
}
/// Parse `--hash=... --hash ...` after a requirement
fn parse_hashes(s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserError> {
fn parse_hashes(content: &str, s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserError> {
let mut hashes = Vec::new();
if s.eat_while("--hash").is_empty() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser {
message: format!(
"Expected '--hash', found '{:?}'",
s.eat_while(|c: char| !c.is_whitespace())
),
location: s.cursor(),
line,
column,
});
}
let hash = parse_value(s, |c: char| !c.is_whitespace())?;
let hash = parse_value(content, s, |c: char| !c.is_whitespace())?;
hashes.push(hash.to_string());
loop {
eat_wrappable_whitespace(s);
if !s.eat_if("--hash") {
break;
}
let hash = parse_value(s, |c: char| !c.is_whitespace())?;
let hash = parse_value(content, s, |c: char| !c.is_whitespace())?;
hashes.push(hash.to_string());
}
Ok(hashes)
@ -705,6 +713,7 @@ fn parse_hashes(s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserErr
/// In `-<key>=<value>` or `-<key> value`, this parses the part after the key
fn parse_value<'a, T>(
content: &str,
s: &mut Scanner<'a>,
while_pattern: impl Pattern<T>,
) -> Result<&'a str, RequirementsTxtParserError> {
@ -716,9 +725,11 @@ fn parse_value<'a, T>(
s.eat_whitespace();
Ok(s.eat_while(while_pattern).trim_end())
} else {
let (line, column) = calculate_row_column(content, s.cursor());
Err(RequirementsTxtParserError::Parser {
message: format!("Expected '=' or whitespace, found {:?}", s.peek()),
location: s.cursor(),
line,
column,
})
}
}
@ -746,7 +757,8 @@ pub enum RequirementsTxtParserError {
MissingEditablePrefix(String),
Parser {
message: String,
location: usize,
line: usize,
column: usize,
},
UnsupportedRequirement {
source: Pep508Error,
@ -786,9 +798,14 @@ impl RequirementsTxtParserError {
Self::UnsupportedUrl(url) => Self::UnsupportedUrl(url),
Self::MissingRequirementPrefix(given) => Self::MissingRequirementPrefix(given),
Self::MissingEditablePrefix(given) => Self::MissingEditablePrefix(given),
Self::Parser { message, location } => Self::Parser {
Self::Parser {
message,
location: location + offset,
line,
column,
} => Self::Parser {
message,
line,
column,
},
Self::UnsupportedRequirement { source, start, end } => Self::UnsupportedRequirement {
source,
@ -831,8 +848,12 @@ impl Display for RequirementsTxtParserError {
"Requirement `{given}` looks like a directory but was passed as a package name. Did you mean `-e {given}`?"
)
}
Self::Parser { message, location } => {
write!(f, "{message} at position {location}")
Self::Parser {
message,
line,
column,
} => {
write!(f, "{message} at {line}:{column}")
}
Self::UnsupportedRequirement { start, end, .. } => {
write!(f, "Unsupported requirement in position {start} to {end}")
@ -903,10 +924,14 @@ impl Display for RequirementsTxtFileError {
self.file.simplified_display(),
)
}
RequirementsTxtParserError::Parser { message, location } => {
RequirementsTxtParserError::Parser {
message,
line,
column,
} => {
write!(
f,
"{message} in `{}` at position {location}",
"{message} at {}:{line}:{column}",
self.file.simplified_display(),
)
}
@ -947,6 +972,46 @@ impl From<io::Error> for RequirementsTxtParserError {
}
}
/// Calculates the column and line offset of a given cursor based on the
/// number of Unicode codepoints.
fn calculate_row_column(content: &str, position: usize) -> (usize, usize) {
let mut line = 1;
let mut column = 1;
let mut chars = content.char_indices().peekable();
while let Some((index, char)) = chars.next() {
if index >= position {
break;
}
match char {
'\r' => {
// If the next character is a newline, skip it.
if chars
.peek()
.map_or(false, |&(_, next_char)| next_char == '\n')
{
chars.next();
}
// Reset.
line += 1;
column = 1;
}
'\n' => {
//
line += 1;
column = 1;
}
// Increment column by Unicode codepoint. We don't use visual width
// (e.g., `UnicodeWidthChar::width(char).unwrap_or(0)`), since that's
// not what editors typically count.
_ => column += 1,
}
}
(line, column)
}
#[cfg(test)]
mod test {
use std::path::{Path, PathBuf};
@ -958,9 +1023,10 @@ mod test {
use itertools::Itertools;
use tempfile::tempdir;
use test_case::test_case;
use unscanny::Scanner;
use uv_fs::Simplified;
use crate::{EditableRequirement, RequirementsTxt};
use crate::{calculate_row_column, EditableRequirement, RequirementsTxt};
fn workspace_test_data_dir() -> PathBuf {
PathBuf::from("./test-data")
@ -1279,4 +1345,56 @@ mod test {
Some(("../editable[", "[dev]"))
);
}
#[test]
fn parser_error_line_and_column() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.write_str(indoc! {"
numpy>=1,<2
--borken
tqdm
"})?;
let error = RequirementsTxt::parse(requirements_txt.path(), temp_dir.path()).unwrap_err();
let errors = anyhow::Error::new(error).chain().join("\n");
let requirement_txt =
regex::escape(&requirements_txt.path().simplified_display().to_string());
let filters = vec![
(requirement_txt.as_str(), "<REQUIREMENTS_TXT>"),
(r"\\", "/"),
];
insta::with_settings!({
filters => filters
}, {
insta::assert_display_snapshot!(errors, @r###"
Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement at <REQUIREMENTS_TXT>:2:3
"###);
});
Ok(())
}
#[test_case("numpy>=1,<2\n @-borken\ntqdm", "2:4"; "ASCII Character with LF")]
#[test_case("numpy>=1,<2\r\n #-borken\ntqdm", "2:4"; "ASCII Character with CRLF")]
#[test_case("numpy>=1,<2\n \n-borken\ntqdm", "3:1"; "ASCII Character LF then LF")]
#[test_case("numpy>=1,<2\n \r-borken\ntqdm", "3:1"; "ASCII Character LF then CR but no LF")]
#[test_case("numpy>=1,<2\n \r\n-borken\ntqdm", "3:1"; "ASCII Character LF then CRLF")]
#[test_case("numpy>=1,<2\n 🚀-borken\ntqdm", "2:4"; "Emoji (Wide) Character")]
#[test_case("numpy>=1,<2\n 中-borken\ntqdm", "2:4"; "Fullwidth character")]
#[test_case("numpy>=1,<2\n e\u{0301}-borken\ntqdm", "2:5"; "Two codepoints")]
#[test_case("numpy>=1,<2\n a\u{0300}\u{0316}-borken\ntqdm", "2:6"; "Three codepoints")]
fn test_calculate_line_column_pair(input: &str, expected: &str) {
let mut s = Scanner::new(input);
// Place cursor right after the character we want to test
s.eat_until('-');
// Compute line/column
let (line, column) = calculate_row_column(input, s.cursor());
let line_column = format!("{line}:{column}");
// Assert line and columns are expected
assert_eq!(line_column, expected, "Issues with input: {input}");
}
}