diff --git a/crates/requirements-txt/src/lib.rs b/crates/requirements-txt/src/lib.rs index f28e2658e..0fc7ef9bb 100644 --- a/crates/requirements-txt/src/lib.rs +++ b/crates/requirements-txt/src/lib.rs @@ -412,9 +412,11 @@ impl RequirementsTxt { } RequirementsTxtStatement::IndexUrl(url) => { if data.index_url.is_some() { + let (line, column) = calculate_row_column(content, s.cursor()); return Err(RequirementsTxtParserError::Parser { message: "Multiple `--index-url` values provided".to_string(), - location: s.cursor(), + line, + column, }); } data.index_url = Some(url); @@ -453,36 +455,36 @@ fn parse_entry( eat_wrappable_whitespace(s); while s.at(['\n', '\r', '#']) { // skip comments - eat_trailing_line(s)?; + eat_trailing_line(content, s)?; eat_wrappable_whitespace(s); } let start = s.cursor(); Ok(Some(if s.eat_if("-r") || s.eat_if("--requirement") { - let requirements_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?; + let requirements_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?; let end = s.cursor(); - eat_trailing_line(s)?; + eat_trailing_line(content, s)?; RequirementsTxtStatement::Requirements { filename: requirements_file.to_string(), start, end, } } else if s.eat_if("-c") || s.eat_if("--constraint") { - let constraints_file = parse_value(s, |c: char| !['\n', '\r', '#'].contains(&c))?; + let constraints_file = parse_value(content, s, |c: char| !['\n', '\r', '#'].contains(&c))?; let end = s.cursor(); - eat_trailing_line(s)?; + eat_trailing_line(content, s)?; RequirementsTxtStatement::Constraint { filename: constraints_file.to_string(), start, end, } } else if s.eat_if("-e") || s.eat_if("--editable") { - let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; + let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?; let editable_requirement = EditableRequirement::parse(path_or_url, working_dir) .map_err(|err| err.with_offset(start))?; RequirementsTxtStatement::EditableRequirement(editable_requirement) } else if s.eat_if("-i") || s.eat_if("--index-url") { - let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; + let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?; let url = VerbatimUrl::parse(given) .map(|url| url.with_given(given.to_owned())) .map_err(|err| RequirementsTxtParserError::Url { @@ -493,7 +495,7 @@ fn parse_entry( })?; RequirementsTxtStatement::IndexUrl(url) } else if s.eat_if("--extra-index-url") { - let given = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; + let given = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?; let url = VerbatimUrl::parse(given) .map(|url| url.with_given(given.to_owned())) .map_err(|err| RequirementsTxtParserError::Url { @@ -506,7 +508,7 @@ fn parse_entry( } else if s.eat_if("--no-index") { RequirementsTxtStatement::NoIndex } else if s.eat_if("--find-links") || s.eat_if("-f") { - let path_or_url = parse_value(s, |c: char| !['\n', '\r'].contains(&c))?; + let path_or_url = parse_value(content, s, |c: char| !['\n', '\r'].contains(&c))?; let path_or_url = FindLink::parse(path_or_url, working_dir).map_err(|err| { RequirementsTxtParserError::Url { source: err, @@ -524,11 +526,13 @@ fn parse_entry( editable: false, }) } else if let Some(char) = s.peek() { + let (line, column) = calculate_row_column(content, s.cursor()); return Err(RequirementsTxtParserError::Parser { message: format!( "Unexpected '{char}', expected '-c', '-e', '-r' or the start of a requirement" ), - location: s.cursor(), + line, + column, }); } else { // EOF @@ -549,7 +553,7 @@ fn eat_wrappable_whitespace<'a>(s: &mut Scanner<'a>) -> &'a str { } /// Eats the end of line or a potential trailing comma -fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError> { +fn eat_trailing_line(content: &str, s: &mut Scanner) -> Result<(), RequirementsTxtParserError> { s.eat_while([' ', '\t']); match s.eat() { None | Some('\n') => {} // End of file or end of line, nothing to do @@ -563,9 +567,11 @@ fn eat_trailing_line(s: &mut Scanner) -> Result<(), RequirementsTxtParserError> } } Some(other) => { + let (line, column) = calculate_row_column(content, s.cursor()); return Err(RequirementsTxtParserError::Parser { message: format!("Expected comment or end-of-line, found '{other}'"), - location: s.cursor(), + line, + column, }); } } @@ -669,8 +675,8 @@ fn parse_requirement_and_hashes( } })?; let hashes = if has_hashes { - let hashes = parse_hashes(s)?; - eat_trailing_line(s)?; + let hashes = parse_hashes(content, s)?; + eat_trailing_line(content, s)?; hashes } else { Vec::new() @@ -679,25 +685,27 @@ fn parse_requirement_and_hashes( } /// Parse `--hash=... --hash ...` after a requirement -fn parse_hashes(s: &mut Scanner) -> Result, RequirementsTxtParserError> { +fn parse_hashes(content: &str, s: &mut Scanner) -> Result, RequirementsTxtParserError> { let mut hashes = Vec::new(); if s.eat_while("--hash").is_empty() { + let (line, column) = calculate_row_column(content, s.cursor()); return Err(RequirementsTxtParserError::Parser { message: format!( "Expected '--hash', found '{:?}'", s.eat_while(|c: char| !c.is_whitespace()) ), - location: s.cursor(), + line, + column, }); } - let hash = parse_value(s, |c: char| !c.is_whitespace())?; + let hash = parse_value(content, s, |c: char| !c.is_whitespace())?; hashes.push(hash.to_string()); loop { eat_wrappable_whitespace(s); if !s.eat_if("--hash") { break; } - let hash = parse_value(s, |c: char| !c.is_whitespace())?; + let hash = parse_value(content, s, |c: char| !c.is_whitespace())?; hashes.push(hash.to_string()); } Ok(hashes) @@ -705,6 +713,7 @@ fn parse_hashes(s: &mut Scanner) -> Result, RequirementsTxtParserErr /// In `-=` or `- value`, this parses the part after the key fn parse_value<'a, T>( + content: &str, s: &mut Scanner<'a>, while_pattern: impl Pattern, ) -> Result<&'a str, RequirementsTxtParserError> { @@ -716,9 +725,11 @@ fn parse_value<'a, T>( s.eat_whitespace(); Ok(s.eat_while(while_pattern).trim_end()) } else { + let (line, column) = calculate_row_column(content, s.cursor()); Err(RequirementsTxtParserError::Parser { message: format!("Expected '=' or whitespace, found {:?}", s.peek()), - location: s.cursor(), + line, + column, }) } } @@ -746,7 +757,8 @@ pub enum RequirementsTxtParserError { MissingEditablePrefix(String), Parser { message: String, - location: usize, + line: usize, + column: usize, }, UnsupportedRequirement { source: Pep508Error, @@ -786,9 +798,14 @@ impl RequirementsTxtParserError { Self::UnsupportedUrl(url) => Self::UnsupportedUrl(url), Self::MissingRequirementPrefix(given) => Self::MissingRequirementPrefix(given), Self::MissingEditablePrefix(given) => Self::MissingEditablePrefix(given), - Self::Parser { message, location } => Self::Parser { + Self::Parser { message, - location: location + offset, + line, + column, + } => Self::Parser { + message, + line, + column, }, Self::UnsupportedRequirement { source, start, end } => Self::UnsupportedRequirement { source, @@ -831,8 +848,12 @@ impl Display for RequirementsTxtParserError { "Requirement `{given}` looks like a directory but was passed as a package name. Did you mean `-e {given}`?" ) } - Self::Parser { message, location } => { - write!(f, "{message} at position {location}") + Self::Parser { + message, + line, + column, + } => { + write!(f, "{message} at {line}:{column}") } Self::UnsupportedRequirement { start, end, .. } => { write!(f, "Unsupported requirement in position {start} to {end}") @@ -903,10 +924,14 @@ impl Display for RequirementsTxtFileError { self.file.simplified_display(), ) } - RequirementsTxtParserError::Parser { message, location } => { + RequirementsTxtParserError::Parser { + message, + line, + column, + } => { write!( f, - "{message} in `{}` at position {location}", + "{message} at {}:{line}:{column}", self.file.simplified_display(), ) } @@ -947,6 +972,46 @@ impl From for RequirementsTxtParserError { } } +/// Calculates the column and line offset of a given cursor based on the +/// number of Unicode codepoints. +fn calculate_row_column(content: &str, position: usize) -> (usize, usize) { + let mut line = 1; + let mut column = 1; + + let mut chars = content.char_indices().peekable(); + while let Some((index, char)) = chars.next() { + if index >= position { + break; + } + match char { + '\r' => { + // If the next character is a newline, skip it. + if chars + .peek() + .map_or(false, |&(_, next_char)| next_char == '\n') + { + chars.next(); + } + + // Reset. + line += 1; + column = 1; + } + '\n' => { + // + line += 1; + column = 1; + } + // Increment column by Unicode codepoint. We don't use visual width + // (e.g., `UnicodeWidthChar::width(char).unwrap_or(0)`), since that's + // not what editors typically count. + _ => column += 1, + } + } + + (line, column) +} + #[cfg(test)] mod test { use std::path::{Path, PathBuf}; @@ -958,9 +1023,10 @@ mod test { use itertools::Itertools; use tempfile::tempdir; use test_case::test_case; + use unscanny::Scanner; use uv_fs::Simplified; - use crate::{EditableRequirement, RequirementsTxt}; + use crate::{calculate_row_column, EditableRequirement, RequirementsTxt}; fn workspace_test_data_dir() -> PathBuf { PathBuf::from("./test-data") @@ -1279,4 +1345,56 @@ mod test { Some(("../editable[", "[dev]")) ); } + + #[test] + fn parser_error_line_and_column() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let requirements_txt = temp_dir.child("requirements.txt"); + requirements_txt.write_str(indoc! {" + numpy>=1,<2 + --borken + tqdm + "})?; + + let error = RequirementsTxt::parse(requirements_txt.path(), temp_dir.path()).unwrap_err(); + let errors = anyhow::Error::new(error).chain().join("\n"); + + let requirement_txt = + regex::escape(&requirements_txt.path().simplified_display().to_string()); + let filters = vec![ + (requirement_txt.as_str(), ""), + (r"\\", "/"), + ]; + insta::with_settings!({ + filters => filters + }, { + insta::assert_display_snapshot!(errors, @r###" + Unexpected '-', expected '-c', '-e', '-r' or the start of a requirement at :2:3 + "###); + }); + + Ok(()) + } + + #[test_case("numpy>=1,<2\n @-borken\ntqdm", "2:4"; "ASCII Character with LF")] + #[test_case("numpy>=1,<2\r\n #-borken\ntqdm", "2:4"; "ASCII Character with CRLF")] + #[test_case("numpy>=1,<2\n \n-borken\ntqdm", "3:1"; "ASCII Character LF then LF")] + #[test_case("numpy>=1,<2\n \r-borken\ntqdm", "3:1"; "ASCII Character LF then CR but no LF")] + #[test_case("numpy>=1,<2\n \r\n-borken\ntqdm", "3:1"; "ASCII Character LF then CRLF")] + #[test_case("numpy>=1,<2\n 🚀-borken\ntqdm", "2:4"; "Emoji (Wide) Character")] + #[test_case("numpy>=1,<2\n 中-borken\ntqdm", "2:4"; "Fullwidth character")] + #[test_case("numpy>=1,<2\n e\u{0301}-borken\ntqdm", "2:5"; "Two codepoints")] + #[test_case("numpy>=1,<2\n a\u{0300}\u{0316}-borken\ntqdm", "2:6"; "Three codepoints")] + fn test_calculate_line_column_pair(input: &str, expected: &str) { + let mut s = Scanner::new(input); + // Place cursor right after the character we want to test + s.eat_until('-'); + + // Compute line/column + let (line, column) = calculate_row_column(input, s.cursor()); + let line_column = format!("{line}:{column}"); + + // Assert line and columns are expected + assert_eq!(line_column, expected, "Issues with input: {input}"); + } }