Linear Locator (#46)

This commit is contained in:
Jeong, YunWon 2023-06-01 13:53:31 +09:00 committed by GitHub
parent fdec727f80
commit 5e9e8a7589
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 1080 additions and 20 deletions

View file

@ -4,7 +4,7 @@ pub mod wrapper;
pub use py_ast::{init, PyNode, ToPyAst};
use pyo3::prelude::*;
use rustpython_parser::ast::{source_code::SourceLocator, Fold};
use rustpython_parser::ast::{source_code::LinearLocator, Fold};
#[pyfunction]
#[pyo3(signature = (source, filename="<unknown>", *, type_comments=false, locate=true))]
@ -21,7 +21,7 @@ pub fn parse<'py>(
let parsed = rustpython_parser::parse(source, rustpython_parser::Mode::Module, filename)
.map_err(|e| PyErr::new::<pyo3::exceptions::PySyntaxError, _>(e.to_string()))?;
if locate {
let parsed = SourceLocator::new(source).fold(parsed).unwrap();
let parsed = LinearLocator::new(source).fold(parsed).unwrap();
parsed.module().unwrap().to_py_ast(py)
} else {
parsed.module().unwrap().to_py_ast(py)

View file

@ -1125,10 +1125,13 @@ class LocatedDefVisitor(EmitVisitor):
self.emit_located_impl(variant_info)
if not info.no_cfg(self.type_info):
self.emit('#[cfg(feature = "all-nodes-with-ranges")]', 0)
cfg = '#[cfg(feature = "all-nodes-with-ranges")]'
else:
cfg = ''
self.emit(
f"""
{cfg}
impl Located for {info.full_type_name} {{
fn range(&self) -> SourceRange {{
match self {{
@ -1136,6 +1139,14 @@ class LocatedDefVisitor(EmitVisitor):
}}
}}
}}
{cfg}
impl LocatedMut for {info.full_type_name} {{
fn range_mut(&mut self) -> &mut SourceRange {{
match self {{
{sum_match_arms.replace('range()', 'range_mut()')}
}}
}}
}}
""".lstrip(),
0,
)
@ -1157,15 +1168,24 @@ class LocatedDefVisitor(EmitVisitor):
def emit_located_impl(self, info):
if not info.no_cfg(self.type_info):
self.emit('#[cfg(feature = "all-nodes-with-ranges")]', 0)
cfg = '#[cfg(feature = "all-nodes-with-ranges")]'
else:
cfg = ''
self.emit(
f"""
{cfg}
impl Located for {info.full_type_name} {{
fn range(&self) -> SourceRange {{
self.range
}}
}}
{cfg}
impl LocatedMut for {info.full_type_name} {{
fn range_mut(&mut self) -> &mut SourceRange {{
&mut self.range
}}
}}
""",
0,
)

File diff suppressed because it is too large Load diff

View file

@ -13,6 +13,10 @@ pub trait Located {
}
}
pub trait LocatedMut: Located {
fn range_mut(&mut self) -> &mut SourceRange;
}
pub type Suite = Vec<Stmt>;
pub use crate::builtin::*;

View file

@ -1,9 +1,11 @@
use crate::Fold;
use rustpython_parser_core::{
source_code::{SourceLocation, SourceLocator, SourceRange},
source_code::{LinearLocator, RandomLocator, SourceLocation, SourceRange},
text_size::TextRange,
};
use std::{convert::Infallible, unreachable};
impl crate::fold::Fold<TextRange> for SourceLocator<'_> {
impl crate::fold::Fold<TextRange> for RandomLocator<'_> {
type TargetU = SourceRange;
type Error = std::convert::Infallible;
type UserContext = SourceLocation;
@ -21,3 +23,266 @@ impl crate::fold::Fold<TextRange> for SourceLocator<'_> {
Ok((start..end).into())
}
}
fn linear_locate_expr_joined_str(
locator: &mut LinearLocator<'_>,
node: crate::ExprJoinedStr<TextRange>,
location: SourceRange,
) -> Result<crate::ExprJoinedStr<SourceRange>, Infallible> {
let crate::ExprJoinedStr { range: _, values } = node;
let mut located_values = Vec::with_capacity(values.len());
for value in values.into_iter() {
let located = match value {
crate::Expr::Constant(constant) => {
let node = crate::ExprConstant {
range: location,
value: constant.value,
kind: constant.kind,
};
crate::Expr::Constant(node)
}
crate::Expr::FormattedValue(formatted) => {
let node = crate::ExprFormattedValue {
range: location,
value: locator.fold(formatted.value)?,
conversion: formatted.conversion,
format_spec: formatted
.format_spec
.map(|spec| match *spec {
crate::Expr::JoinedStr(joined_str) => {
let node =
linear_locate_expr_joined_str(locator, joined_str, location)?;
Ok(crate::Expr::JoinedStr(node))
}
expr => locator.fold(expr),
})
.transpose()?
.map(Box::new),
};
crate::Expr::FormattedValue(node)
}
_ => unreachable!("missing expr type for joined_str?"),
};
located_values.push(located);
}
Ok(crate::ExprJoinedStr {
range: location,
values: located_values,
})
}
impl crate::fold::Fold<TextRange> for LinearLocator<'_> {
type TargetU = SourceRange;
type Error = std::convert::Infallible;
type UserContext = SourceLocation;
fn will_map_user(&mut self, user: &TextRange) -> Self::UserContext {
self.locate(user.start())
}
fn map_user(
&mut self,
user: TextRange,
start: Self::UserContext,
) -> Result<Self::TargetU, Self::Error> {
let end = self.locate(user.end());
Ok((start..end).into())
}
fn fold_expr_dict(
&mut self,
node: crate::ExprDict<TextRange>,
) -> Result<crate::ExprDict<Self::TargetU>, Self::Error> {
let crate::ExprDict {
range,
keys,
values,
} = node;
let context = self.will_map_user(&range);
assert_eq!(keys.len(), values.len());
let mut located_keys = Vec::with_capacity(keys.len());
let mut located_values = Vec::with_capacity(values.len());
for (key, value) in keys.into_iter().zip(values.into_iter()) {
located_keys.push(self.fold(key)?);
located_values.push(self.fold(value)?);
}
let range = self.map_user(range, context)?;
Ok(crate::ExprDict {
range,
keys: located_keys,
values: located_values,
})
}
fn fold_expr_if_exp(
&mut self,
node: crate::ExprIfExp<TextRange>,
) -> Result<crate::ExprIfExp<Self::TargetU>, Self::Error> {
let crate::ExprIfExp {
range,
test,
body,
orelse,
} = node;
let context = self.will_map_user(&range);
let body = self.fold(body)?;
let test = self.fold(test)?;
let orelse = self.fold(orelse)?;
let range = self.map_user(range, context)?;
Ok(crate::ExprIfExp {
range,
test,
body,
orelse,
})
}
fn fold_stmt_class_def(
&mut self,
node: crate::StmtClassDef<TextRange>,
) -> Result<crate::StmtClassDef<Self::TargetU>, Self::Error> {
let crate::StmtClassDef {
name,
bases,
keywords,
body,
decorator_list,
range,
} = node;
let decorator_list = self.fold(decorator_list)?;
let context = self.will_map_user(&range);
let name = self.fold(name)?;
let bases = self.fold(bases)?;
let keywords = self.fold(keywords)?;
let body = self.fold(body)?;
let range = self.map_user(range, context)?;
Ok(crate::StmtClassDef {
name,
bases,
keywords,
body,
decorator_list,
range,
})
}
fn fold_stmt_function_def(
&mut self,
node: crate::StmtFunctionDef<TextRange>,
) -> Result<crate::StmtFunctionDef<Self::TargetU>, Self::Error> {
let crate::StmtFunctionDef {
name,
args,
body,
decorator_list,
returns,
type_comment,
range,
} = node;
let decorator_list = self.fold(decorator_list)?;
let context = self.will_map_user(&range);
let name = self.fold(name)?;
let args: Box<crate::Arguments<SourceRange>> = self.fold(args)?;
let returns = self.fold(returns)?;
let body = self.fold(body)?;
let type_comment = self.fold(type_comment)?;
let range = self.map_user(range, context)?;
Ok(crate::StmtFunctionDef {
name,
args,
body,
decorator_list,
returns,
type_comment,
range,
})
}
fn fold_stmt_async_function_def(
&mut self,
node: crate::StmtAsyncFunctionDef<TextRange>,
) -> Result<crate::StmtAsyncFunctionDef<Self::TargetU>, Self::Error> {
let crate::StmtAsyncFunctionDef {
name,
args,
body,
decorator_list,
returns,
type_comment,
range,
} = node;
let decorator_list = self.fold(decorator_list)?;
let context = self.will_map_user(&range);
let name = self.fold(name)?;
let args: Box<crate::Arguments<SourceRange>> = self.fold(args)?;
let returns = self.fold(returns)?;
let body = self.fold(body)?;
let type_comment = self.fold(type_comment)?;
let range = self.map_user(range, context)?;
Ok(crate::StmtAsyncFunctionDef {
name,
args,
body,
decorator_list,
returns,
type_comment,
range,
})
}
fn fold_expr_joined_str(
&mut self,
node: crate::ExprJoinedStr<TextRange>,
) -> Result<crate::ExprJoinedStr<Self::TargetU>, Self::Error> {
let start = self.locate(node.range.start());
let end = self.locate_only(node.range.end());
let location = SourceRange::new(start, end);
linear_locate_expr_joined_str(self, node, location)
}
fn fold_expr_call(
&mut self,
node: crate::ExprCall<TextRange>,
) -> Result<crate::ExprCall<Self::TargetU>, Self::Error> {
let crate::ExprCall {
range,
func,
args,
keywords,
} = node;
let context = self.will_map_user(&range);
let func = self.fold(func)?;
let keywords = LinearLookaheadLocator(self).fold(keywords)?;
let args = self.fold(args)?;
let range = self.map_user(range, context)?;
Ok(crate::ExprCall {
range,
func,
args,
keywords,
})
}
}
struct LinearLookaheadLocator<'a, 'b>(&'b mut LinearLocator<'a>);
impl crate::fold::Fold<TextRange> for LinearLookaheadLocator<'_, '_> {
type TargetU = SourceRange;
type Error = std::convert::Infallible;
type UserContext = SourceLocation;
fn will_map_user(&mut self, user: &TextRange) -> Self::UserContext {
self.0.locate_only(user.start())
}
fn map_user(
&mut self,
user: TextRange,
start: Self::UserContext,
) -> Result<Self::TargetU, Self::Error> {
let end = self.0.locate_only(user.end());
Ok((start..end).into())
}
}

View file

@ -1,9 +1,10 @@
// re-export our public interface
use crate::text_size::{TextLen, TextSize};
pub use ruff_source_location::*;
pub type LineNumber = OneIndexed;
#[derive(Debug, Copy, Clone)]
#[derive(Debug, Copy, Clone, Default)]
pub struct SourceRange {
pub start: SourceLocation,
pub end: Option<SourceLocation>,
@ -31,12 +32,12 @@ impl From<std::ops::Range<SourceLocation>> for SourceRange {
}
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct SourceLocator<'a> {
pub struct RandomLocator<'a> {
pub source: &'a str,
index: LineIndex,
}
impl<'a> SourceLocator<'a> {
impl<'a> RandomLocator<'a> {
#[inline]
pub fn new(source: &'a str) -> Self {
let index = LineIndex::from_source_text(source);
@ -65,6 +66,182 @@ impl<'a> SourceLocator<'a> {
}
}
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct LinearLocator<'a> {
pub source: &'a str,
state: LinearLocatorState,
#[cfg(debug_assertions)]
index: LineIndex,
}
struct LinearLocatorState {
line_start: TextSize,
line_end: Option<TextSize>,
line_number: OneIndexed,
cursor: TextSize,
is_ascii: bool,
}
impl LinearLocatorState {
fn init(source: &str) -> Self {
let mut line_start = TextSize::default();
if source.starts_with('\u{feff}') {
line_start += '\u{feff}'.text_len();
}
let (line_end, is_ascii) = if let Some(nl) = source.find('\n') {
let is_ascii = source[..nl].is_ascii();
(Some(TextSize::new(nl as u32 + 1)), is_ascii)
} else {
(None, source.is_ascii())
};
let line_number = OneIndexed::MIN;
Self {
line_start,
line_end,
line_number,
cursor: line_start,
is_ascii,
}
}
fn new_line_start(&self, next_offset: TextSize) -> Option<TextSize> {
if let Some(new_line_start) = self.line_end {
if new_line_start <= next_offset {
return Some(new_line_start);
}
}
None
}
}
impl<'a> LinearLocator<'a> {
// nl = newline
#[inline]
pub fn new(source: &'a str) -> Self {
let state = LinearLocatorState::init(source);
Self {
source,
state,
#[cfg(debug_assertions)]
index: LineIndex::from_source_text(source),
}
}
pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
debug_assert!(
self.state.cursor <= offset,
"{:?} -> {:?} {}",
self.state.cursor,
offset,
&self.source[offset.to_usize()..self.state.cursor.to_usize()]
);
let (column, new_state) = self.locate_inner(offset);
if let Some(state) = new_state {
self.state = state;
} else {
self.state.cursor = offset;
}
SourceLocation {
row: self.state.line_number,
column,
}
}
pub fn locate_only(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
let (column, new_state) = self.locate_inner(offset);
let state = new_state.as_ref().unwrap_or(&self.state);
SourceLocation {
row: state.line_number,
column,
}
}
fn locate_inner(
&mut self,
offset: crate::text_size::TextSize,
) -> (OneIndexed, Option<LinearLocatorState>) {
let (column, new_state) = if let Some(new_line_start) = self.state.new_line_start(offset) {
// not fit in current line
let focused = &self.source[new_line_start.to_usize()..offset.to_usize()];
let (lines, line_start, column) = if let Some(last_newline) = focused.rfind('\n') {
let last_newline = new_line_start.to_usize() + last_newline;
let lines = self.source[self.state.cursor.to_usize()..last_newline]
.matches('\n')
.count() as u32
+ 1; // TODO: \r
let line_start = last_newline as u32 + 1;
let column = offset.to_u32() - line_start;
(lines, line_start, column)
} else {
let column = (offset - new_line_start).to_u32();
(1, new_line_start.to_u32(), column)
};
let line_number = self.state.line_number.saturating_add(lines);
let (line_end, is_ascii) =
if let Some(newline) = self.source[line_start as usize..].find('\n') {
let newline = line_start as usize + newline;
debug_assert_eq!(&self.source[newline..][..1], "\n");
let is_ascii = self.source[line_start as usize..newline].is_ascii();
(Some(TextSize::new(newline as u32 + 1)), is_ascii)
} else {
let is_ascii = self.source[line_start as usize..].is_ascii();
(None, is_ascii)
};
let line_start = TextSize::new(line_start);
let state = LinearLocatorState {
line_start,
line_end,
line_number,
cursor: offset,
is_ascii,
};
(column, Some(state))
} else {
let column = (offset - self.state.line_start).to_u32();
(column, None)
};
let state = new_state.as_ref().unwrap_or(&self.state);
let column = if state.is_ascii {
column
} else {
self.source[state.line_start.to_usize()..][..column as usize]
.chars()
.count() as u32
};
let column = OneIndexed::from_zero_indexed(column);
#[cfg(debug_assertions)]
{
let location = SourceLocation {
row: state.line_number,
column,
};
let source_code = SourceCode::new(self.source, &self.index);
assert_eq!(
location,
source_code.source_location(offset),
"input: {} -> {} {}",
self.state.cursor.to_usize(),
offset.to_usize(),
&self.source[self.state.cursor.to_usize()..offset.to_usize()]
);
}
(column, new_state)
}
pub fn locate_error<T, U>(&mut self, base: crate::error::BaseError<T>) -> LocatedError<U>
where
T: Into<U>,
{
let location = self.locate(base.offset);
LocatedError {
error: base.error.into(),
location: Some(location),
source_path: base.source_path,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedError<T> {
pub error: T,
@ -124,3 +301,36 @@ where
Some(&self.error)
}
}
#[test]
fn test_linear_locator() {
let source = r#"
123456789
abcdefghi
"#
.strip_prefix(char::is_whitespace)
.unwrap();
let mut locator = LinearLocator::new(source);
let mut random_locator = RandomLocator::new(source);
let mut test = |(row, col), offset| {
let input = TextSize::from(offset);
let expected: SourceLocation = SourceLocation {
row: OneIndexed::new(row).unwrap(),
column: OneIndexed::new(col).unwrap(),
};
let actual = locator.locate(input);
let actual2 = random_locator.locate(input);
assert_eq!(expected, actual);
assert_eq!(expected, actual2);
};
test((1, 1), 0);
test((1, 6), 5);
test((1, 9), 8);
test((2, 1), 10);
test((4, 1), 21);
test((4, 3), 27);
}