mirror of
https://github.com/RustPython/Parser.git
synced 2025-08-30 07:08:14 +00:00
Linear Locator (#46)
This commit is contained in:
parent
fdec727f80
commit
5e9e8a7589
6 changed files with 1080 additions and 20 deletions
|
@ -1,9 +1,10 @@
|
|||
// re-export our public interface
|
||||
use crate::text_size::{TextLen, TextSize};
|
||||
pub use ruff_source_location::*;
|
||||
|
||||
pub type LineNumber = OneIndexed;
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[derive(Debug, Copy, Clone, Default)]
|
||||
pub struct SourceRange {
|
||||
pub start: SourceLocation,
|
||||
pub end: Option<SourceLocation>,
|
||||
|
@ -31,12 +32,12 @@ impl From<std::ops::Range<SourceLocation>> for SourceRange {
|
|||
}
|
||||
|
||||
/// Converts source code byte-offset to Python convention line and column numbers.
|
||||
pub struct SourceLocator<'a> {
|
||||
pub struct RandomLocator<'a> {
|
||||
pub source: &'a str,
|
||||
index: LineIndex,
|
||||
}
|
||||
|
||||
impl<'a> SourceLocator<'a> {
|
||||
impl<'a> RandomLocator<'a> {
|
||||
#[inline]
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let index = LineIndex::from_source_text(source);
|
||||
|
@ -65,6 +66,182 @@ impl<'a> SourceLocator<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts source code byte-offset to Python convention line and column numbers.
|
||||
pub struct LinearLocator<'a> {
|
||||
pub source: &'a str,
|
||||
state: LinearLocatorState,
|
||||
#[cfg(debug_assertions)]
|
||||
index: LineIndex,
|
||||
}
|
||||
|
||||
struct LinearLocatorState {
|
||||
line_start: TextSize,
|
||||
line_end: Option<TextSize>,
|
||||
line_number: OneIndexed,
|
||||
cursor: TextSize,
|
||||
is_ascii: bool,
|
||||
}
|
||||
|
||||
impl LinearLocatorState {
|
||||
fn init(source: &str) -> Self {
|
||||
let mut line_start = TextSize::default();
|
||||
if source.starts_with('\u{feff}') {
|
||||
line_start += '\u{feff}'.text_len();
|
||||
}
|
||||
let (line_end, is_ascii) = if let Some(nl) = source.find('\n') {
|
||||
let is_ascii = source[..nl].is_ascii();
|
||||
(Some(TextSize::new(nl as u32 + 1)), is_ascii)
|
||||
} else {
|
||||
(None, source.is_ascii())
|
||||
};
|
||||
let line_number = OneIndexed::MIN;
|
||||
Self {
|
||||
line_start,
|
||||
line_end,
|
||||
line_number,
|
||||
cursor: line_start,
|
||||
is_ascii,
|
||||
}
|
||||
}
|
||||
|
||||
fn new_line_start(&self, next_offset: TextSize) -> Option<TextSize> {
|
||||
if let Some(new_line_start) = self.line_end {
|
||||
if new_line_start <= next_offset {
|
||||
return Some(new_line_start);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> LinearLocator<'a> {
|
||||
// nl = newline
|
||||
|
||||
#[inline]
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let state = LinearLocatorState::init(source);
|
||||
Self {
|
||||
source,
|
||||
state,
|
||||
#[cfg(debug_assertions)]
|
||||
index: LineIndex::from_source_text(source),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
|
||||
debug_assert!(
|
||||
self.state.cursor <= offset,
|
||||
"{:?} -> {:?} {}",
|
||||
self.state.cursor,
|
||||
offset,
|
||||
&self.source[offset.to_usize()..self.state.cursor.to_usize()]
|
||||
);
|
||||
let (column, new_state) = self.locate_inner(offset);
|
||||
if let Some(state) = new_state {
|
||||
self.state = state;
|
||||
} else {
|
||||
self.state.cursor = offset;
|
||||
}
|
||||
SourceLocation {
|
||||
row: self.state.line_number,
|
||||
column,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn locate_only(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
|
||||
let (column, new_state) = self.locate_inner(offset);
|
||||
let state = new_state.as_ref().unwrap_or(&self.state);
|
||||
SourceLocation {
|
||||
row: state.line_number,
|
||||
column,
|
||||
}
|
||||
}
|
||||
|
||||
fn locate_inner(
|
||||
&mut self,
|
||||
offset: crate::text_size::TextSize,
|
||||
) -> (OneIndexed, Option<LinearLocatorState>) {
|
||||
let (column, new_state) = if let Some(new_line_start) = self.state.new_line_start(offset) {
|
||||
// not fit in current line
|
||||
let focused = &self.source[new_line_start.to_usize()..offset.to_usize()];
|
||||
let (lines, line_start, column) = if let Some(last_newline) = focused.rfind('\n') {
|
||||
let last_newline = new_line_start.to_usize() + last_newline;
|
||||
let lines = self.source[self.state.cursor.to_usize()..last_newline]
|
||||
.matches('\n')
|
||||
.count() as u32
|
||||
+ 1; // TODO: \r
|
||||
let line_start = last_newline as u32 + 1;
|
||||
let column = offset.to_u32() - line_start;
|
||||
(lines, line_start, column)
|
||||
} else {
|
||||
let column = (offset - new_line_start).to_u32();
|
||||
(1, new_line_start.to_u32(), column)
|
||||
};
|
||||
let line_number = self.state.line_number.saturating_add(lines);
|
||||
let (line_end, is_ascii) =
|
||||
if let Some(newline) = self.source[line_start as usize..].find('\n') {
|
||||
let newline = line_start as usize + newline;
|
||||
debug_assert_eq!(&self.source[newline..][..1], "\n");
|
||||
let is_ascii = self.source[line_start as usize..newline].is_ascii();
|
||||
(Some(TextSize::new(newline as u32 + 1)), is_ascii)
|
||||
} else {
|
||||
let is_ascii = self.source[line_start as usize..].is_ascii();
|
||||
(None, is_ascii)
|
||||
};
|
||||
let line_start = TextSize::new(line_start);
|
||||
let state = LinearLocatorState {
|
||||
line_start,
|
||||
line_end,
|
||||
line_number,
|
||||
cursor: offset,
|
||||
is_ascii,
|
||||
};
|
||||
(column, Some(state))
|
||||
} else {
|
||||
let column = (offset - self.state.line_start).to_u32();
|
||||
(column, None)
|
||||
};
|
||||
let state = new_state.as_ref().unwrap_or(&self.state);
|
||||
let column = if state.is_ascii {
|
||||
column
|
||||
} else {
|
||||
self.source[state.line_start.to_usize()..][..column as usize]
|
||||
.chars()
|
||||
.count() as u32
|
||||
};
|
||||
let column = OneIndexed::from_zero_indexed(column);
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
let location = SourceLocation {
|
||||
row: state.line_number,
|
||||
column,
|
||||
};
|
||||
let source_code = SourceCode::new(self.source, &self.index);
|
||||
assert_eq!(
|
||||
location,
|
||||
source_code.source_location(offset),
|
||||
"input: {} -> {} {}",
|
||||
self.state.cursor.to_usize(),
|
||||
offset.to_usize(),
|
||||
&self.source[self.state.cursor.to_usize()..offset.to_usize()]
|
||||
);
|
||||
}
|
||||
(column, new_state)
|
||||
}
|
||||
|
||||
pub fn locate_error<T, U>(&mut self, base: crate::error::BaseError<T>) -> LocatedError<U>
|
||||
where
|
||||
T: Into<U>,
|
||||
{
|
||||
let location = self.locate(base.offset);
|
||||
LocatedError {
|
||||
error: base.error.into(),
|
||||
location: Some(location),
|
||||
source_path: base.source_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct LocatedError<T> {
|
||||
pub error: T,
|
||||
|
@ -124,3 +301,36 @@ where
|
|||
Some(&self.error)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linear_locator() {
|
||||
let source = r#"
|
||||
123456789
|
||||
abcdefghi
|
||||
|
||||
유니코드
|
||||
"#
|
||||
.strip_prefix(char::is_whitespace)
|
||||
.unwrap();
|
||||
let mut locator = LinearLocator::new(source);
|
||||
let mut random_locator = RandomLocator::new(source);
|
||||
|
||||
let mut test = |(row, col), offset| {
|
||||
let input = TextSize::from(offset);
|
||||
let expected: SourceLocation = SourceLocation {
|
||||
row: OneIndexed::new(row).unwrap(),
|
||||
column: OneIndexed::new(col).unwrap(),
|
||||
};
|
||||
let actual = locator.locate(input);
|
||||
let actual2 = random_locator.locate(input);
|
||||
assert_eq!(expected, actual);
|
||||
assert_eq!(expected, actual2);
|
||||
};
|
||||
|
||||
test((1, 1), 0);
|
||||
test((1, 6), 5);
|
||||
test((1, 9), 8);
|
||||
test((2, 1), 10);
|
||||
test((4, 1), 21);
|
||||
test((4, 3), 27);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue