Fix LinearLocator \r handling (#80)

This commit is contained in:
Jeong, YunWon 2023-06-02 22:35:53 +09:00 committed by Jeong YunWon
parent 5e9e8a7589
commit 00193e0798
7 changed files with 42 additions and 438 deletions

View file

@ -34,6 +34,7 @@ num-complex = "0.4.0"
num-bigint = "0.4.3"
num-traits = "0.2"
pyo3 = { version = "0.18.3" }
memchr = "2.5.0"
rand = "0.8.5"
serde = "1.0"
static_assertions = "1.1"

View file

@ -14,6 +14,7 @@ ruff_source_location = { path = "../ruff_source_location", optional = true }
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
is-macro.workspace = true
memchr.workspace = true
[features]
default = []

View file

@ -1,7 +1,11 @@
// re-export our public interface
use crate::text_size::{TextLen, TextSize};
pub use ruff_source_location::*;
use memchr::memrchr2;
pub use ruff_source_location::{
newlines::{find_newline, UniversalNewlineIterator},
LineIndex, OneIndexed, SourceCode, SourceLocation,
};
pub type LineNumber = OneIndexed;
#[derive(Debug, Copy, Clone, Default)]
@ -88,9 +92,12 @@ impl LinearLocatorState {
if source.starts_with('\u{feff}') {
line_start += '\u{feff}'.text_len();
}
let (line_end, is_ascii) = if let Some(nl) = source.find('\n') {
let is_ascii = source[..nl].is_ascii();
(Some(TextSize::new(nl as u32 + 1)), is_ascii)
let (line_end, is_ascii) = if let Some((position, line_ending)) = find_newline(source) {
let is_ascii = source[..position].is_ascii();
(
Some(TextSize::new(position as u32 + line_ending.len() as u32)),
is_ascii,
)
} else {
(None, source.is_ascii())
};
@ -164,30 +171,34 @@ impl<'a> LinearLocator<'a> {
let (column, new_state) = if let Some(new_line_start) = self.state.new_line_start(offset) {
// not fit in current line
let focused = &self.source[new_line_start.to_usize()..offset.to_usize()];
let (lines, line_start, column) = if let Some(last_newline) = focused.rfind('\n') {
let last_newline = new_line_start.to_usize() + last_newline;
let lines = self.source[self.state.cursor.to_usize()..last_newline]
.matches('\n')
.count() as u32
+ 1; // TODO: \r
let line_start = last_newline as u32 + 1;
let column = offset.to_u32() - line_start;
(lines, line_start, column)
} else {
let column = (offset - new_line_start).to_u32();
(1, new_line_start.to_u32(), column)
};
let line_number = self.state.line_number.saturating_add(lines);
let (line_end, is_ascii) =
if let Some(newline) = self.source[line_start as usize..].find('\n') {
let newline = line_start as usize + newline;
debug_assert_eq!(&self.source[newline..][..1], "\n");
let is_ascii = self.source[line_start as usize..newline].is_ascii();
(Some(TextSize::new(newline as u32 + 1)), is_ascii)
let (lines, line_start, column) =
if let Some(last_newline) = memrchr2(b'\r', b'\n', focused.as_bytes()) {
let last_newline = new_line_start.to_usize() + last_newline;
let lines = UniversalNewlineIterator::from(
&self.source[self.state.cursor.to_usize()..last_newline + 1],
)
.count();
let line_start = last_newline as u32 + 1;
let column = offset.to_u32() - line_start;
(lines as u32, line_start, column)
} else {
let is_ascii = self.source[line_start as usize..].is_ascii();
(None, is_ascii)
let column = (offset - new_line_start).to_u32();
(1, new_line_start.to_u32(), column)
};
let line_number = self.state.line_number.saturating_add(lines);
let (line_end, is_ascii) = if let Some((newline, line_ending)) =
find_newline(&self.source[line_start as usize..])
{
let newline = line_start as usize + newline;
let is_ascii = self.source[line_start as usize..newline].is_ascii();
(
Some(TextSize::new(newline as u32 + line_ending.len() as u32)),
is_ascii,
)
} else {
let is_ascii = self.source[line_start as usize..].is_ascii();
(None, is_ascii)
};
let line_start = TextSize::new(line_start);
let state = LinearLocatorState {
line_start,

View file

@ -13,5 +13,5 @@ rust-version = { workspace = true }
[dependencies]
ruff_text_size = { path = "../ruff_text_size" }
memchr = "2.5.0"
memchr = { workspace = true }
once_cell = { workspace = true }

View file

@ -1,6 +1,6 @@
mod line_index;
// mod locator;
// pub mod newline;
pub mod newlines;
pub use crate::line_index::{LineIndex, OneIndexed};
// TODO: RUSTPYTHON; import it later

View file

@ -1,409 +0,0 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use crate::newlines::find_newline;
use crate::{LineIndex, OneIndexed, SourceCode, SourceLocation};
use memchr::{memchr2, memrchr2};
use once_cell::unsync::OnceCell;
use ruff_text_size::{TextLen, TextRange, TextSize};
use std::ops::Add;
pub struct Locator<'a> {
contents: &'a str,
index: OnceCell<LineIndex>,
}
impl<'a> Locator<'a> {
pub const fn new(contents: &'a str) -> Self {
Self {
contents,
index: OnceCell::new(),
}
}
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
self.to_index().line_index(offset)
}
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
self.to_source_code().source_location(offset)
}
fn to_index(&self) -> &LineIndex {
self.index
.get_or_init(|| LineIndex::from_source_text(self.contents))
}
pub fn line_index(&self) -> Option<&LineIndex> {
self.index.get()
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
index: self.to_index(),
text: self.contents,
}
}
/// Computes the start position of the line of `offset`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\rthird line");
///
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
///
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_start(&self, offset: TextSize) -> TextSize {
let bytes = self.contents[TextRange::up_to(offset)].as_bytes();
if let Some(index) = memrchr2(b'\n', b'\r', bytes) {
// SAFETY: Safe because `index < offset`
TextSize::try_from(index).unwrap().add(TextSize::from(1))
} else {
TextSize::default()
}
}
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
offset == TextSize::from(0)
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
}
/// Computes the offset that is right after the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some((index, line_ending)) = find_newline(slice) {
offset + TextSize::try_from(index).unwrap() + line_ending.text_len()
} else {
self.contents.text_len()
}
}
/// Computes the offset that is right before the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) {
offset + TextSize::try_from(index).unwrap()
} else {
self.contents.text_len()
}
}
/// Computes the range of this `offset`s line.
///
/// The range starts at the beginning of the line and goes up to, and including, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.full_line_end(offset))
}
/// Computes the range of this `offset`s line ending before the newline character.
///
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.line_end(offset))
}
/// Returns the text of the `offset`'s line.
///
/// The line includes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line(&self, offset: TextSize) -> &'a str {
&self.contents[self.full_line_range(offset)]
}
/// Returns the text of the `offset`'s line.
///
/// Excludes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line(&self, offset: TextSize) -> &'a str {
&self.contents[self.line_range(offset)]
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
/// at the end of `range.ends()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(11))
/// );
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(24))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(
self.line_start(range.start()),
self.full_line_end(range.end()),
)
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
/// at the end of `range.end()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(10))
/// );
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(22))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
}
/// Returns true if the text of `range` contains any line break.
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert!(
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// );
/// assert!(
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// );
/// ```
///
/// ## Panics
/// If the `range` is out of bounds.
pub fn contains_line_break(&self, range: TextRange) -> bool {
let text = &self.contents[range];
text.contains(['\n', '\r'])
}
/// Returns the text of all lines that include `range`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line"
/// );
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines(&self, range: TextRange) -> &'a str {
&self.contents[self.lines_range(range)]
}
/// Returns the text of all lines that include `range`.
///
/// Includes the newline characters of the last line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line\n"
/// );
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line\r\n"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines(&self, range: TextRange) -> &'a str {
&self.contents[self.full_lines_range(range)]
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn up_to(&self, offset: TextSize) -> &'a str {
&self.contents[TextRange::up_to(offset)]
}
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'a str {
&self.contents[usize::from(offset)..]
}
/// Take the source code between the given [`TextRange`].
#[inline]
pub fn slice(&self, range: TextRange) -> &'a str {
&self.contents[range]
}
/// Return the underlying source code.
pub fn contents(&self) -> &'a str {
self.contents
}
/// Return the number of bytes in the source code.
pub const fn len(&self) -> usize {
self.contents.len()
}
pub fn text_len(&self) -> TextSize {
self.contents.text_len()
}
/// Return `true` if the source code is empty.
pub const fn is_empty(&self) -> bool {
self.contents.is_empty()
}
}

View file

@ -21,7 +21,7 @@ impl StrExt for str {
///
/// ```rust
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::newlines::{Line, UniversalNewlineIterator};
/// # use ruff_source_location::newlines::{Line, UniversalNewlineIterator};
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
///
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));