mirror of
https://github.com/RustPython/Parser.git
synced 2025-08-27 05:44:52 +00:00
Fix LinearLocator \r handling (#80)
This commit is contained in:
parent
a2e3209c42
commit
b2f95e2848
7 changed files with 42 additions and 438 deletions
|
@ -13,5 +13,5 @@ rust-version = { workspace = true }
|
|||
[dependencies]
|
||||
ruff_text_size = { path = "../ruff_text_size" }
|
||||
|
||||
memchr = "2.5.0"
|
||||
memchr = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
mod line_index;
|
||||
// mod locator;
|
||||
// pub mod newline;
|
||||
pub mod newlines;
|
||||
|
||||
pub use crate::line_index::{LineIndex, OneIndexed};
|
||||
// TODO: RUSTPYTHON; import it later
|
||||
|
|
|
@ -1,409 +0,0 @@
|
|||
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||||
|
||||
use crate::newlines::find_newline;
|
||||
use crate::{LineIndex, OneIndexed, SourceCode, SourceLocation};
|
||||
use memchr::{memchr2, memrchr2};
|
||||
use once_cell::unsync::OnceCell;
|
||||
use ruff_text_size::{TextLen, TextRange, TextSize};
|
||||
use std::ops::Add;
|
||||
|
||||
pub struct Locator<'a> {
|
||||
contents: &'a str,
|
||||
index: OnceCell<LineIndex>,
|
||||
}
|
||||
|
||||
impl<'a> Locator<'a> {
|
||||
pub const fn new(contents: &'a str) -> Self {
|
||||
Self {
|
||||
contents,
|
||||
index: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[deprecated(
|
||||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||||
)]
|
||||
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
self.to_index().line_index(offset)
|
||||
}
|
||||
|
||||
#[deprecated(
|
||||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||||
)]
|
||||
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
|
||||
self.to_source_code().source_location(offset)
|
||||
}
|
||||
|
||||
fn to_index(&self) -> &LineIndex {
|
||||
self.index
|
||||
.get_or_init(|| LineIndex::from_source_text(self.contents))
|
||||
}
|
||||
|
||||
pub fn line_index(&self) -> Option<&LineIndex> {
|
||||
self.index.get()
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode {
|
||||
index: self.to_index(),
|
||||
text: self.contents,
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the start position of the line of `offset`.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\rthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
|
||||
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
|
||||
///
|
||||
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
|
||||
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line_start(&self, offset: TextSize) -> TextSize {
|
||||
let bytes = self.contents[TextRange::up_to(offset)].as_bytes();
|
||||
if let Some(index) = memrchr2(b'\n', b'\r', bytes) {
|
||||
// SAFETY: Safe because `index < offset`
|
||||
TextSize::try_from(index).unwrap().add(TextSize::from(1))
|
||||
} else {
|
||||
TextSize::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
|
||||
offset == TextSize::from(0)
|
||||
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
|
||||
}
|
||||
|
||||
/// Computes the offset that is right after the newline character that ends `offset`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
|
||||
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If `offset` is passed the end of the content.
|
||||
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
|
||||
let slice = &self.contents[usize::from(offset)..];
|
||||
if let Some((index, line_ending)) = find_newline(slice) {
|
||||
offset + TextSize::try_from(index).unwrap() + line_ending.text_len()
|
||||
} else {
|
||||
self.contents.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the offset that is right before the newline character that ends `offset`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
|
||||
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
|
||||
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If `offset` is passed the end of the content.
|
||||
pub fn line_end(&self, offset: TextSize) -> TextSize {
|
||||
let slice = &self.contents[usize::from(offset)..];
|
||||
if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) {
|
||||
offset + TextSize::try_from(index).unwrap()
|
||||
} else {
|
||||
self.contents.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the range of this `offset`s line.
|
||||
///
|
||||
/// The range starts at the beginning of the line and goes up to, and including, the new line character
|
||||
/// at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
|
||||
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
|
||||
TextRange::new(self.line_start(offset), self.full_line_end(offset))
|
||||
}
|
||||
|
||||
/// Computes the range of this `offset`s line ending before the newline character.
|
||||
///
|
||||
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
|
||||
/// at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
|
||||
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
|
||||
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line_range(&self, offset: TextSize) -> TextRange {
|
||||
TextRange::new(self.line_start(offset), self.line_end(offset))
|
||||
}
|
||||
|
||||
/// Returns the text of the `offset`'s line.
|
||||
///
|
||||
/// The line includes the newline characters at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
|
||||
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
|
||||
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn full_line(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[self.full_line_range(offset)]
|
||||
}
|
||||
|
||||
/// Returns the text of the `offset`'s line.
|
||||
///
|
||||
/// Excludes the newline characters at the end of the line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
|
||||
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
|
||||
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `offset` is out of bounds.
|
||||
pub fn line(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[self.line_range(offset)]
|
||||
}
|
||||
|
||||
/// Computes the range of all lines that this `range` covers.
|
||||
///
|
||||
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
|
||||
/// at the end of `range.ends()`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(11))
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(24))
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
|
||||
TextRange::new(
|
||||
self.line_start(range.start()),
|
||||
self.full_line_end(range.end()),
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the range of all lines that this `range` covers.
|
||||
///
|
||||
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
|
||||
/// at the end of `range.end()`'s line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(10))
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// TextRange::new(TextSize::from(0), TextSize::from(22))
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn lines_range(&self, range: TextRange) -> TextRange {
|
||||
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
|
||||
}
|
||||
|
||||
/// Returns true if the text of `range` contains any line break.
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert!(
|
||||
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// );
|
||||
/// assert!(
|
||||
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the `range` is out of bounds.
|
||||
pub fn contains_line_break(&self, range: TextRange) -> bool {
|
||||
let text = &self.contents[range];
|
||||
text.contains(['\n', '\r'])
|
||||
}
|
||||
|
||||
/// Returns the text of all lines that include `range`.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// "First line"
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// "First line\nsecond line"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn lines(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[self.lines_range(range)]
|
||||
}
|
||||
|
||||
/// Returns the text of all lines that include `range`.
|
||||
///
|
||||
/// Includes the newline characters of the last line.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{TextRange, TextSize};
|
||||
/// # use ruff_python_ast::source_code::Locator;
|
||||
///
|
||||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||||
/// "First line\n"
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||||
/// "First line\nsecond line\r\n"
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
pub fn full_lines(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[self.full_lines_range(range)]
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn up_to(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[TextRange::up_to(offset)]
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn after(&self, offset: TextSize) -> &'a str {
|
||||
&self.contents[usize::from(offset)..]
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`TextRange`].
|
||||
#[inline]
|
||||
pub fn slice(&self, range: TextRange) -> &'a str {
|
||||
&self.contents[range]
|
||||
}
|
||||
|
||||
/// Return the underlying source code.
|
||||
pub fn contents(&self) -> &'a str {
|
||||
self.contents
|
||||
}
|
||||
|
||||
/// Return the number of bytes in the source code.
|
||||
pub const fn len(&self) -> usize {
|
||||
self.contents.len()
|
||||
}
|
||||
|
||||
pub fn text_len(&self) -> TextSize {
|
||||
self.contents.text_len()
|
||||
}
|
||||
|
||||
/// Return `true` if the source code is empty.
|
||||
pub const fn is_empty(&self) -> bool {
|
||||
self.contents.is_empty()
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@ impl StrExt for str {
|
|||
///
|
||||
/// ```rust
|
||||
/// # use ruff_text_size::TextSize;
|
||||
/// # use ruff_python_ast::newlines::{Line, UniversalNewlineIterator};
|
||||
/// # use ruff_source_location::newlines::{Line, UniversalNewlineIterator};
|
||||
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
|
||||
///
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue