feat: report line and column on requirements parser errors (#2100)

## Summary Closes #2012 This changes `RequirementsTxtParserError::Parser` to take a line, column instead of cursor location to improve reporting of parser errors. A new function was added to compute the line and column based on the content and cursor location when a parser error occurs for simplicity. Given `uv pip compile .\requirements.txt` of below ``` numpy>=1,<2 --borken tqdm ``` Before: ``` error: Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement in `.\requirements.txt` at position 14 ``` After: ``` error: Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement in `.\requirements.txt` at position 2:3 ``` Open Question: Do we want to support `line:column` for other types of errors? I didn't look dig other potential error types where this might be desired. ## Test Plan New test was added to `requirements-txt` crate with this example.
2025-09-29 21:44:51 +00:00 · 2024-03-01 16:50:11 -05:00 · 2024-03-01 16:50:11 -05:00 · c7c3affee0
commit c7c3affee0
parent 0233a5771d
1 changed files with 146 additions and 28 deletions
--- a/crates/requirements-txt/src/lib.rs
+++ b/crates/requirements-txt/src/lib.rs
@ -412,9 +412,11 @@ impl RequirementsTxt {
                }
                RequirementsTxtStatement::IndexUrl(url) => {
                    if data.index_url.is_some() {
                        let (line, column) = calculate_row_column(content, s.cursor());
                        return Err(RequirementsTxtParserError::Parser {
                            message: "Multiple `--index-url` values provided".to_string(),
-                            location: s.cursor(),
+                            line,
                            column,
                        });
                    }
                    data.index_url = Some(url);
@ -453,36 +455,36 @@ fn parse_entry(
    eat_wrappable_whitespace(s);
    while s.at(['\n', '\r', '#']) {
        // skip comments
-        eat_trailing_line(s)?;
+        eat_trailing_line(content, s)?;
        eat_wrappable_whitespace(s);
    }
    let start = s.cursor();
    Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") {
-        let requirements_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?;
+        let requirements_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
        let end = s.cursor();
-        eat_trailing_line(s)?;
+        eat_trailing_line(content, s)?;
        RequirementsTxtStatement::Requirements {
            filename: requirements_file.to_string(),
            start,
            end,
        }
    } else if s.eat_if("-c") || s.eat_if("--constraint") {
-        let constraints_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?;
+        let constraints_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?;
        let end = s.cursor();
-        eat_trailing_line(s)?;
+        eat_trailing_line(content, s)?;
        RequirementsTxtStatement::Constraint {
            filename: constraints_file.to_string(),
            start,
            end,
        }
    } else if s.eat_if("-e") || s.eat_if("--editable") {
-        let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
+        let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
        let editable_requirement = EditableRequirement::parse(path_or_url, working_dir)
            .map_err(|err| err.with_offset(start))?;
        RequirementsTxtStatement::EditableRequirement(editable_requirement)
    } else if s.eat_if("-i") || s.eat_if("--index-url") {
-        let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
+        let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
        let url = VerbatimUrl::parse(given)
            .map(|url| url.with_given(given.to_owned()))
            .map_err(|err| RequirementsTxtParserError::Url {
@ -493,7 +495,7 @@ fn parse_entry(
            })?;
        RequirementsTxtStatement::IndexUrl(url)
    } else if s.eat_if("--extra-index-url") {
-        let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
+        let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
        let url = VerbatimUrl::parse(given)
            .map(|url| url.with_given(given.to_owned()))
            .map_err(|err| RequirementsTxtParserError::Url {
@ -506,7 +508,7 @@ fn parse_entry(
    } else if s.eat_if("--no-index") {
        RequirementsTxtStatement::NoIndex
    } else if s.eat_if("--find-links") || s.eat_if("-f") {
-        let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?;
+        let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?;
        let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| {
            RequirementsTxtParserError::Url {
                source: err,
@ -524,11 +526,13 @@ fn parse_entry(
            editable: false,
        })
    } else if let Some(char) = s.peek() {
        let (line, column) = calculate_row_column(content, s.cursor());
        return Err(RequirementsTxtParserError::Parser {
            message: format!(
                "Unexpected '{char}', expected '-c', '-e', '-r' or the start of a requirement"
            ),
-            location: s.cursor(),
+            line,
            column,
        });
    } else {
        // EOF
@ -549,7 +553,7 @@ fn eat_wrappable_whitespace<'a>(s: &mut Scanner<'a>) -> &'a str {
 }
 /// Eats the end of line or a potential trailing comma
-fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError> {
+fn eat_trailing_line(content: &str, s: &mut Scanner) -> Result<(), RequirementsTxtParserError> {
    s.eat_while([' ', '\t']);
    match s.eat() {
        None | Some('\n') => {} // End of file or end of line, nothing to do
@ -563,9 +567,11 @@ fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError>
            }
        }
        Some(other) => {
            let (line, column) = calculate_row_column(content, s.cursor());
            return Err(RequirementsTxtParserError::Parser {
                message: format!("Expected comment or end-of-line, found '{other}'"),
-                location: s.cursor(),
+                line,
                column,
            });
        }
    }
@ -669,8 +675,8 @@ fn parse_requirement_and_hashes(
            }
        })?;
    let hashes = if has_hashes {
-        let hashes = parse_hashes(s)?;
+        let hashes = parse_hashes(content, s)?;
-        eat_trailing_line(s)?;
+        eat_trailing_line(content, s)?;
        hashes
    } else {
        Vec::new()
@ -679,25 +685,27 @@ fn parse_requirement_and_hashes(
 }
 /// Parse `--hash=... --hash ...` after a requirement
-fn parse_hashes(s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserError> {
+fn parse_hashes(content: &str, s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserError> {
    let mut hashes = Vec::new();
    if s.eat_while("--hash").is_empty() {
        let (line, column) = calculate_row_column(content, s.cursor());
        return Err(RequirementsTxtParserError::Parser {
            message: format!(
                "Expected '--hash', found '{:?}'",
                s.eat_while(|c: char| !c.is_whitespace())
            ),
-            location: s.cursor(),
+            line,
            column,
        });
    }
-    let hash = parse_value(s, |c: char| !c.is_whitespace())?;
+    let hash = parse_value(content, s, |c: char| !c.is_whitespace())?;
    hashes.push(hash.to_string());
    loop {
        eat_wrappable_whitespace(s);
        if !s.eat_if("--hash") {
            break;
        }
-        let hash = parse_value(s, |c: char| !c.is_whitespace())?;
+        let hash = parse_value(content, s, |c: char| !c.is_whitespace())?;
        hashes.push(hash.to_string());
    }
    Ok(hashes)
@ -705,6 +713,7 @@ fn parse_hashes(s: &mut Scanner) -> Result<Vec<String>, RequirementsTxtParserErr
 /// In `-<key>=<value>` or `-<key> value`, this parses the part after the key
 fn parse_value<'a, T>(
    content: &str,
    s: &mut Scanner<'a>,
    while_pattern: impl Pattern<T>,
 ) -> Result<&'a str, RequirementsTxtParserError> {
@ -716,9 +725,11 @@ fn parse_value<'a, T>(
        s.eat_whitespace();
        Ok(s.eat_while(while_pattern).trim_end())
    } else {
        let (line, column) = calculate_row_column(content, s.cursor());
        Err(RequirementsTxtParserError::Parser {
            message: format!("Expected '=' or whitespace, found {:?}", s.peek()),
-            location: s.cursor(),
+            line,
            column,
        })
    }
 }
@ -746,7 +757,8 @@ pub enum RequirementsTxtParserError {
    MissingEditablePrefix(String),
    Parser {
        message: String,
-        location: usize,
+        line: usize,
        column: usize,
    },
    UnsupportedRequirement {
        source: Pep508Error,
@ -786,9 +798,14 @@ impl RequirementsTxtParserError {
            Self::UnsupportedUrl(url) => Self::UnsupportedUrl(url),
            Self::MissingRequirementPrefix(given) => Self::MissingRequirementPrefix(given),
            Self::MissingEditablePrefix(given) => Self::MissingEditablePrefix(given),
-            Self::Parser { message, location } => Self::Parser {
+            Self::Parser {
                message,
-                location: location + offset,
+                line,
                column,
            } => Self::Parser {
                message,
                line,
                column,
            },
            Self::UnsupportedRequirement { source, start, end } => Self::UnsupportedRequirement {
                source,
@ -831,8 +848,12 @@ impl Display for RequirementsTxtParserError {
                    "Requirement `{given}` looks like a directory but was passed as a package name. Did you mean `-e {given}`?"
                )
            }
-            Self::Parser { message, location } => {
+            Self::Parser {
-                write!(f, "{message} at position {location}")
+                message,
                line,
                column,
            } => {
                write!(f, "{message} at {line}:{column}")
            }
            Self::UnsupportedRequirement { start, end, .. } => {
                write!(f, "Unsupported requirement in position {start} to {end}")
@ -903,10 +924,14 @@ impl Display for RequirementsTxtFileError {
                    self.file.simplified_display(),
                )
            }
-            RequirementsTxtParserError::Parser { message, location } => {
+            RequirementsTxtParserError::Parser {
                message,
                line,
                column,
            } => {
                write!(
                    f,
-                    "{message} in `{}` at position {location}",
+                    "{message} at {}:{line}:{column}",
                    self.file.simplified_display(),
                )
            }
@ -947,6 +972,46 @@ impl From<io::Error> for RequirementsTxtParserError {
    }
 }
 /// Calculates the column and line offset of a given cursor based on the
 /// number of Unicode codepoints.
 fn calculate_row_column(content: &str, position: usize) -> (usize, usize) {
    let mut line = 1;
    let mut column = 1;
    let mut chars = content.char_indices().peekable();
    while let Some((index, char)) = chars.next() {
        if index >= position {
            break;
        }
        match char {
            '\r' => {
                // If the next character is a newline, skip it.
                if chars
                    .peek()
                    .map_or(false, |&(_, next_char)| next_char == '\n')
                {
                    chars.next();
                }
                // Reset.
                line += 1;
                column = 1;
            }
            '\n' => {
                //
                line += 1;
                column = 1;
            }
            // Increment column by Unicode codepoint. We don't use visual width
            // (e.g., `UnicodeWidthChar::width(char).unwrap_or(0)`), since that's
            // not what editors typically count.
            _ => column += 1,
        }
    }
    (line, column)
 }
 #[cfg(test)]
 mod test {
    use std::path::{Path, PathBuf};
@ -958,9 +1023,10 @@ mod test {
    use itertools::Itertools;
    use tempfile::tempdir;
    use test_case::test_case;
    use unscanny::Scanner;
    use uv_fs::Simplified;
-    use crate::{EditableRequirement, RequirementsTxt};
+    use crate::{calculate_row_column, EditableRequirement, RequirementsTxt};
    fn workspace_test_data_dir() -> PathBuf {
        PathBuf::from("./test-data")
@ -1279,4 +1345,56 @@ mod test {
            Some(("../editable[", "[dev]"))
        );
    }
    #[test]
    fn parser_error_line_and_column() -> Result<()> {
        let temp_dir = assert_fs::TempDir::new()?;
        let requirements_txt = temp_dir.child("requirements.txt");
        requirements_txt.write_str(indoc! {"
            numpy>=1,<2
              --borken
            tqdm
        "})?;
        let error = RequirementsTxt::parse(requirements_txt.path(), temp_dir.path()).unwrap_err();
        let errors = anyhow::Error::new(error).chain().join("\n");
        let requirement_txt =
            regex::escape(&requirements_txt.path().simplified_display().to_string());
        let filters = vec![
            (requirement_txt.as_str(), "<REQUIREMENTS_TXT>"),
            (r"\\", "/"),
        ];
        insta::with_settings!({
            filters => filters
        }, {
            insta::assert_display_snapshot!(errors, @r###"
            Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement at <REQUIREMENTS_TXT>:2:3
            "###);
        });
        Ok(())
    }
    #[test_case("numpy>=1,<2\n  @-borken\ntqdm", "2:4"; "ASCII Character with LF")]
    #[test_case("numpy>=1,<2\r\n  #-borken\ntqdm", "2:4"; "ASCII Character with CRLF")]
    #[test_case("numpy>=1,<2\n  \n-borken\ntqdm", "3:1"; "ASCII Character LF then LF")]
    #[test_case("numpy>=1,<2\n  \r-borken\ntqdm", "3:1"; "ASCII Character LF then CR but no LF")]
    #[test_case("numpy>=1,<2\n  \r\n-borken\ntqdm", "3:1"; "ASCII Character LF then CRLF")]
    #[test_case("numpy>=1,<2\n  🚀-borken\ntqdm", "2:4"; "Emoji (Wide) Character")]
    #[test_case("numpy>=1,<2\n  中-borken\ntqdm", "2:4"; "Fullwidth character")]
    #[test_case("numpy>=1,<2\n  e\u{0301}-borken\ntqdm", "2:5"; "Two codepoints")]
    #[test_case("numpy>=1,<2\n  a\u{0300}\u{0316}-borken\ntqdm", "2:6"; "Three codepoints")]
    fn test_calculate_line_column_pair(input: &str, expected: &str) {
        let mut s = Scanner::new(input);
        // Place cursor right after the character we want to test
        s.eat_until('-');
        // Compute line/column
        let (line, column) = calculate_row_column(input, s.cursor());
        let line_column = format!("{line}:{column}");
        // Assert line and columns are expected
        assert_eq!(line_column, expected, "Issues with input: {input}");
    }
 }