feat: report line and column on requirements parser errors (#2100)

## Summary

Closes #2012

This changes `RequirementsTxtParserError::Parser` to take a line, column
instead of cursor location to improve reporting of parser errors. A new
function was added to compute the line and column based on the content
and cursor location when a parser error occurs for simplicity.

Given `uv pip compile .\requirements.txt` of below
```
numpy>=1,<2
  --borken
tqdm
```

Before:

``` 
error: Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement in `.\requirements.txt` at position 14
```

After:

```
error: Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement in `.\requirements.txt` at position 2:3
```

Open Question: Do we want to support `line:column` for other types of
errors? I didn't look dig other potential error types where this might
be desired.

## Test Plan

New test was added to `requirements-txt` crate with this example.
This commit is contained in:
samypr100 2024-03-01 16:50:11 -05:00 committed by GitHub
parent 0233a5771d
commit c7c3affee0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -412,9 +412,11 @@ impl RequirementsTxt {
} }
RequirementsTxtStatement::IndexUrl(url) => { RequirementsTxtStatement::IndexUrl(url) => {
if data.index_url.is_some() { if data.index_url.is_some() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser { return Err(RequirementsTxtParserError::Parser {
message: "Multiple `--index-url` values provided".to_string(), message: "Multiple `--index-url` values provided".to_string(),
location: s.cursor(), line,
column,
}); });
} }
data.index_url = Some(url); data.index_url = Some(url);
@ -453,36 +455,36 @@ fn parse_entry(
eat_wrappable_whitespace(s); eat_wrappable_whitespace(s);
while s.at(['\n', '\r', '#']) { while s.at(['\n', '\r', '#']) {
// skip comments // skip comments
eat_trailing_line(s)?; eat_trailing_line(content, s)?;
eat_wrappable_whitespace(s); eat_wrappable_whitespace(s);
} }
let start = s.cursor(); let start = s.cursor();
Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") { Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") {
let requirements_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?; let requirements_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let end = s.cursor(); let end = s.cursor();
eat_trailing_line(s)?; eat_trailing_line(content, s)?;
RequirementsTxtStatement::Requirements { RequirementsTxtStatement::Requirements {
filename: requirements_file.to_string(), filename: requirements_file.to_string(),
start, start,
end, end,
} }
} else if s.eat_if("-c") || s.eat_if("--constraint") { } else if s.eat_if("-c") || s.eat_if("--constraint") {
let constraints_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?; let constraints_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
let end = s.cursor(); let end = s.cursor();
eat_trailing_line(s)?; eat_trailing_line(content, s)?;
RequirementsTxtStatement::Constraint { RequirementsTxtStatement::Constraint {
filename: constraints_file.to_string(), filename: constraints_file.to_string(),
start, start,
end, end,
} }
} else if s.eat_if("-e") || s.eat_if("--editable") { } else if s.eat_if("-e") || s.eat_if("--editable") {
let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let editable_requirement = EditableRequirement::parse(path_or_url, working_dir) let editable_requirement = EditableRequirement::parse(path_or_url, working_dir)
.map_err(|err| err.with_offset(start))?; .map_err(|err| err.with_offset(start))?;
RequirementsTxtStatement::EditableRequirement(editable_requirement) RequirementsTxtStatement::EditableRequirement(editable_requirement)
} else if s.eat_if("-i") || s.eat_if("--index-url") { } else if s.eat_if("-i") || s.eat_if("--index-url") {
let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let url = VerbatimUrl::parse(given) let url = VerbatimUrl::parse(given)
.map(|url| url.with_given(given.to_owned())) .map(|url| url.with_given(given.to_owned()))
.map_err(|err| RequirementsTxtParserError::Url { .map_err(|err| RequirementsTxtParserError::Url {
@ -493,7 +495,7 @@ fn parse_entry(
})?; })?;
RequirementsTxtStatement::IndexUrl(url) RequirementsTxtStatement::IndexUrl(url)
} else if s.eat_if("--extra-index-url") { } else if s.eat_if("--extra-index-url") {
let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let url = VerbatimUrl::parse(given) let url = VerbatimUrl::parse(given)
.map(|url| url.with_given(given.to_owned())) .map(|url| url.with_given(given.to_owned()))
.map_err(|err| RequirementsTxtParserError::Url { .map_err(|err| RequirementsTxtParserError::Url {
@ -506,7 +508,7 @@ fn parse_entry(
} else if s.eat_if("--no-index") { } else if s.eat_if("--no-index") {
RequirementsTxtStatement::NoIndex RequirementsTxtStatement::NoIndex
} else if s.eat_if("--find-links") || s.eat_if("-f") { } else if s.eat_if("--find-links") || s.eat_if("-f") {
let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| { let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| {
RequirementsTxtParserError::Url { RequirementsTxtParserError::Url {
source: err, source: err,
@ -524,11 +526,13 @@ fn parse_entry(
editable: false, editable: false,
}) })
} else if let Some(char) = s.peek() { } else if let Some(char) = s.peek() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser { return Err(RequirementsTxtParserError::Parser {
message: format!( message: format!(
"Unexpected '{char}', expected '-c', '-e', '-r' or the start of a requirement" "Unexpected '{char}', expected '-c', '-e', '-r' or the start of a requirement"
), ),
location: s.cursor(), line,
column,
}); });
} else { } else {
// EOF // EOF
@ -549,7 +553,7 @@ fn eat_wrappable_whitespace<'a>(s: &mut Scanner<'a>) -> &'a str {
} }
/// Eats the end of line or a potential trailing comma /// Eats the end of line or a potential trailing comma
fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError> { fn eat_trailing_line(content: &str, s: &mut Scanner) -> Result<(), RequirementsTxtParserError> {
s.eat_while([' ', '\t']); s.eat_while([' ', '\t']);
match s.eat() { match s.eat() {
None | Some('\n') => {} // End of file or end of line, nothing to do None | Some('\n') => {} // End of file or end of line, nothing to do
@ -563,9 +567,11 @@ fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError>
} }
} }
Some(other) => { Some(other) => {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser { return Err(RequirementsTxtParserError::Parser {
message: format!("Expected comment or end-of-line, found '{other}'"), message: format!("Expected comment or end-of-line, found '{other}'"),
location: s.cursor(), line,
column,
}); });
} }
} }
@ -669,8 +675,8 @@ fn parse_requirement_and_hashes(
} }
})?; })?;
let hashes = if has_hashes { let hashes = if has_hashes {
let hashes = parse_hashes(s)?; let hashes = parse_hashes(content, s)?;
eat_trailing_line(s)?; eat_trailing_line(content, s)?;
hashes hashes
} else { } else {
Vec::new() Vec::new()
@ -679,25 +685,27 @@ fn parse_requirement_and_hashes(
} }
/// Parse `--hash=... --hash ...` after a requirement /// Parse `--hash=... --hash ...` after a requirement
fn parse_hashes(s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserError> { fn parse_hashes(content: &str, s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserError> {
let mut hashes = Vec::new(); let mut hashes = Vec::new();
if s.eat_while("--hash").is_empty() { if s.eat_while("--hash").is_empty() {
let (line, column) = calculate_row_column(content, s.cursor());
return Err(RequirementsTxtParserError::Parser { return Err(RequirementsTxtParserError::Parser {
message: format!( message: format!(
"Expected '--hash', found '{:?}'", "Expected '--hash', found '{:?}'",
s.eat_while(|c: char| !c.is_whitespace()) s.eat_while(|c: char| !c.is_whitespace())
), ),
location: s.cursor(), line,
column,
}); });
} }
let hash = parse_value(s, |c: char| !c.is_whitespace())?; let hash = parse_value(content, s, |c: char| !c.is_whitespace())?;
hashes.push(hash.to_string()); hashes.push(hash.to_string());
loop { loop {
eat_wrappable_whitespace(s); eat_wrappable_whitespace(s);
if !s.eat_if("--hash") { if !s.eat_if("--hash") {
break; break;
} }
let hash = parse_value(s, |c: char| !c.is_whitespace())?; let hash = parse_value(content, s, |c: char| !c.is_whitespace())?;
hashes.push(hash.to_string()); hashes.push(hash.to_string());
} }
Ok(hashes) Ok(hashes)
@ -705,6 +713,7 @@ fn parse_hashes(s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserErr
/// In `-<key>=<value>` or `-<key> value`, this parses the part after the key /// In `-<key>=<value>` or `-<key> value`, this parses the part after the key
fn parse_value<'a, T>( fn parse_value<'a, T>(
content: &str,
s: &mut Scanner<'a>, s: &mut Scanner<'a>,
while_pattern: impl Pattern<T>, while_pattern: impl Pattern<T>,
) -> Result<&'a str, RequirementsTxtParserError> { ) -> Result<&'a str, RequirementsTxtParserError> {
@ -716,9 +725,11 @@ fn parse_value<'a, T>(
s.eat_whitespace(); s.eat_whitespace();
Ok(s.eat_while(while_pattern).trim_end()) Ok(s.eat_while(while_pattern).trim_end())
} else { } else {
let (line, column) = calculate_row_column(content, s.cursor());
Err(RequirementsTxtParserError::Parser { Err(RequirementsTxtParserError::Parser {
message: format!("Expected '=' or whitespace, found {:?}", s.peek()), message: format!("Expected '=' or whitespace, found {:?}", s.peek()),
location: s.cursor(), line,
column,
}) })
} }
} }
@ -746,7 +757,8 @@ pub enum RequirementsTxtParserError {
MissingEditablePrefix(String), MissingEditablePrefix(String),
Parser { Parser {
message: String, message: String,
location: usize, line: usize,
column: usize,
}, },
UnsupportedRequirement { UnsupportedRequirement {
source: Pep508Error, source: Pep508Error,
@ -786,9 +798,14 @@ impl RequirementsTxtParserError {
Self::UnsupportedUrl(url) => Self::UnsupportedUrl(url), Self::UnsupportedUrl(url) => Self::UnsupportedUrl(url),
Self::MissingRequirementPrefix(given) => Self::MissingRequirementPrefix(given), Self::MissingRequirementPrefix(given) => Self::MissingRequirementPrefix(given),
Self::MissingEditablePrefix(given) => Self::MissingEditablePrefix(given), Self::MissingEditablePrefix(given) => Self::MissingEditablePrefix(given),
Self::Parser { message, location } => Self::Parser { Self::Parser {
message, message,
location: location + offset, line,
column,
} => Self::Parser {
message,
line,
column,
}, },
Self::UnsupportedRequirement { source, start, end } => Self::UnsupportedRequirement { Self::UnsupportedRequirement { source, start, end } => Self::UnsupportedRequirement {
source, source,
@ -831,8 +848,12 @@ impl Display for RequirementsTxtParserError {
"Requirement `{given}` looks like a directory but was passed as a package name. Did you mean `-e {given}`?" "Requirement `{given}` looks like a directory but was passed as a package name. Did you mean `-e {given}`?"
) )
} }
Self::Parser { message, location } => { Self::Parser {
write!(f, "{message} at position {location}") message,
line,
column,
} => {
write!(f, "{message} at {line}:{column}")
} }
Self::UnsupportedRequirement { start, end, .. } => { Self::UnsupportedRequirement { start, end, .. } => {
write!(f, "Unsupported requirement in position {start} to {end}") write!(f, "Unsupported requirement in position {start} to {end}")
@ -903,10 +924,14 @@ impl Display for RequirementsTxtFileError {
self.file.simplified_display(), self.file.simplified_display(),
) )
} }
RequirementsTxtParserError::Parser { message, location } => { RequirementsTxtParserError::Parser {
message,
line,
column,
} => {
write!( write!(
f, f,
"{message} in `{}` at position {location}", "{message} at {}:{line}:{column}",
self.file.simplified_display(), self.file.simplified_display(),
) )
} }
@ -947,6 +972,46 @@ impl From<io::Error> for RequirementsTxtParserError {
} }
} }
/// Calculates the column and line offset of a given cursor based on the
/// number of Unicode codepoints.
fn calculate_row_column(content: &str, position: usize) -> (usize, usize) {
let mut line = 1;
let mut column = 1;
let mut chars = content.char_indices().peekable();
while let Some((index, char)) = chars.next() {
if index >= position {
break;
}
match char {
'\r' => {
// If the next character is a newline, skip it.
if chars
.peek()
.map_or(false, |&(_, next_char)| next_char == '\n')
{
chars.next();
}
// Reset.
line += 1;
column = 1;
}
'\n' => {
//
line += 1;
column = 1;
}
// Increment column by Unicode codepoint. We don't use visual width
// (e.g., `UnicodeWidthChar::width(char).unwrap_or(0)`), since that's
// not what editors typically count.
_ => column += 1,
}
}
(line, column)
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -958,9 +1023,10 @@ mod test {
use itertools::Itertools; use itertools::Itertools;
use tempfile::tempdir; use tempfile::tempdir;
use test_case::test_case; use test_case::test_case;
use unscanny::Scanner;
use uv_fs::Simplified; use uv_fs::Simplified;
use crate::{EditableRequirement, RequirementsTxt}; use crate::{calculate_row_column, EditableRequirement, RequirementsTxt};
fn workspace_test_data_dir() -> PathBuf { fn workspace_test_data_dir() -> PathBuf {
PathBuf::from("./test-data") PathBuf::from("./test-data")
@ -1279,4 +1345,56 @@ mod test {
Some(("../editable[", "[dev]")) Some(("../editable[", "[dev]"))
); );
} }
#[test]
fn parser_error_line_and_column() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let requirements_txt = temp_dir.child("requirements.txt");
requirements_txt.write_str(indoc! {"
numpy>=1,<2
--borken
tqdm
"})?;
let error = RequirementsTxt::parse(requirements_txt.path(), temp_dir.path()).unwrap_err();
let errors = anyhow::Error::new(error).chain().join("\n");
let requirement_txt =
regex::escape(&requirements_txt.path().simplified_display().to_string());
let filters = vec![
(requirement_txt.as_str(), "<REQUIREMENTS_TXT>"),
(r"\\", "/"),
];
insta::with_settings!({
filters => filters
}, {
insta::assert_display_snapshot!(errors, @r###"
Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement at <REQUIREMENTS_TXT>:2:3
"###);
});
Ok(())
}
#[test_case("numpy>=1,<2\n @-borken\ntqdm", "2:4"; "ASCII Character with LF")]
#[test_case("numpy>=1,<2\r\n #-borken\ntqdm", "2:4"; "ASCII Character with CRLF")]
#[test_case("numpy>=1,<2\n \n-borken\ntqdm", "3:1"; "ASCII Character LF then LF")]
#[test_case("numpy>=1,<2\n \r-borken\ntqdm", "3:1"; "ASCII Character LF then CR but no LF")]
#[test_case("numpy>=1,<2\n \r\n-borken\ntqdm", "3:1"; "ASCII Character LF then CRLF")]
#[test_case("numpy>=1,<2\n 🚀-borken\ntqdm", "2:4"; "Emoji (Wide) Character")]
#[test_case("numpy>=1,<2\n 中-borken\ntqdm", "2:4"; "Fullwidth character")]
#[test_case("numpy>=1,<2\n e\u{0301}-borken\ntqdm", "2:5"; "Two codepoints")]
#[test_case("numpy>=1,<2\n a\u{0300}\u{0316}-borken\ntqdm", "2:6"; "Three codepoints")]
fn test_calculate_line_column_pair(input: &str, expected: &str) {
let mut s = Scanner::new(input);
// Place cursor right after the character we want to test
s.eat_until('-');
// Compute line/column
let (line, column) = calculate_row_column(input, s.cursor());
let line_column = format!("{line}:{column}");
// Assert line and columns are expected
assert_eq!(line_column, expected, "Issues with input: {input}");
}
} }