Adapt SourceLocation

This commit is contained in:
Jeong YunWon 2023-05-09 18:30:35 +09:00
parent a14e43e03a
commit 09a6afdd04
117 changed files with 1606 additions and 1676 deletions

View file

@ -14,6 +14,7 @@ num-bigint = { workspace = true }
num-complex = { workspace = true }
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
ruff_text_size = { path = "../ruff_text_size" }
ruff_python_ast = { workspace = true }
lz4_flex = "0.9.2"

View file

@ -1,7 +1,10 @@
//! Implement python as a virtual machine with bytecode. This module
//! implements bytecode structure.
use crate::{marshal, Location};
use crate::{
marshal,
source_code::{OneIndexed, SourceLocation},
};
use bitflags::bitflags;
use itertools::Itertools;
use num_bigint::BigInt;
@ -89,14 +92,14 @@ impl ConstantBag for BasicBag {
#[derive(Clone)]
pub struct CodeObject<C: Constant = ConstantData> {
pub instructions: Box<[CodeUnit]>,
pub locations: Box<[Location]>,
pub locations: Box<[SourceLocation]>,
pub flags: CodeFlags,
pub posonlyarg_count: u32,
// Number of positional-only arguments
pub arg_count: u32,
pub kwonlyarg_count: u32,
pub source_path: C::Name,
pub first_line_number: u32,
pub first_line_number: OneIndexed,
pub max_stackdepth: u32,
pub obj_name: C::Name,
// Name of the object that created this code object
@ -974,14 +977,14 @@ impl<C: Constant> CodeObject<C> {
let label_targets = self.label_targets();
let line_digits = (3).max(self.locations.last().unwrap().row.to_string().len());
let offset_digits = (4).max(self.instructions.len().to_string().len());
let mut last_line = u32::MAX;
let mut last_line = OneIndexed::MAX;
let mut arg_state = OpArgState::default();
for (offset, &instruction) in self.instructions.iter().enumerate() {
let (instruction, arg) = arg_state.get(instruction);
// optional line number
let line = self.locations[offset].row;
if line != last_line {
if last_line != u32::MAX {
if last_line != OneIndexed::MAX {
writeln!(f)?;
}
last_line = line;

View file

@ -1,4 +1,4 @@
use crate::{text_size::TextSize, Location};
use crate::{source_code::SourceLocation, text_size::TextSize};
use std::fmt::Display;
#[derive(Debug, PartialEq, Eq)]
@ -62,18 +62,23 @@ impl<T> BaseError<T> {
BaseError::from(self)
}
pub fn into_located<U>(self, locator: &str) -> LocatedError<U>
pub fn into_located<U>(self, locator: &mut super::SourceLocator) -> LocatedError<U>
where
T: Into<U>,
{
todo!()
let location = locator.locate(self.offset);
LocatedError {
error: self.error.into(),
location: Some(location),
source_path: self.source_path,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedError<T> {
pub error: T,
pub location: Location,
pub location: Option<SourceLocation>,
pub source_path: String,
}
@ -99,6 +104,17 @@ impl<T> LocatedError<T> {
{
LocatedError::from(self)
}
pub fn python_location(&self) -> (usize, usize) {
if let Some(location) = self.location {
(
location.row.to_one_indexed(),
location.column.to_one_indexed(),
)
} else {
(0, 0)
}
}
}
impl<T> Display for LocatedError<T>
@ -106,11 +122,10 @@ where
T: std::fmt::Display,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"{} at row {} col {}",
&self.error, self.location.row, self.location.column,
)
let (row, column) = self.location.map_or((0, 0), |l| {
(l.row.to_one_indexed(), l.column.to_one_indexed())
});
write!(f, "{} at row {} col {}", &self.error, row, column,)
}
}

View file

@ -1,20 +1,41 @@
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-compiler-core/")]
mod bytecode;
// parser core
mod error;
mod location;
pub mod marshal;
mod mode;
pub use bytecode::*;
pub use error::{BaseError, LocatedError};
pub use location::{Location, LocationRange};
pub use error::BaseError;
pub use mode::Mode;
pub use ruff_text_size as text_size; // re-export mandatory and frequently accessed dependency
// FIXME: temp code
pub fn to_location(offset: &text_size::TextSize, source: &str) -> Location {
todo!()
// compiler core
mod bytecode;
pub mod marshal;
pub use bytecode::*;
pub use error::LocatedError;
pub use ruff_python_ast::source_code;
pub use ruff_python_ast::source_code::OneIndexed as LineNumber;
use source_code::{LineIndex, SourceCode, SourceLocation};
use text_size::TextSize;
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct SourceLocator<'a> {
pub source: &'a str,
index: LineIndex,
}
impl<'a> SourceLocator<'a> {
#[inline]
pub fn new(source: &'a str) -> Self {
let index = LineIndex::from_source_text(source);
Self { source, index }
}
pub fn locate(&mut self, offset: TextSize) -> SourceLocation {
let code = SourceCode::new(self.source, &self.index);
let offset = unsafe { std::mem::transmute(offset) }; // temp code to fix text_size dependency
code.source_location(offset)
}
}

View file

@ -1,127 +0,0 @@
/// Source code location.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Location {
pub(super) row: u32,
pub(super) column: u32,
}
impl Default for Location {
fn default() -> Self {
Self { row: 1, column: 0 }
}
}
impl Location {
pub fn fmt_with(
&self,
f: &mut impl std::fmt::Write,
e: &impl std::fmt::Display,
) -> std::fmt::Result {
write!(f, "{} at line {} column {}", e, self.row(), self.column())
}
}
impl Location {
/// Creates a new Location object at the given row and column.
///
/// # Example
/// ```
/// use rustpython_compiler_core::Location;
/// let loc = Location::new(10, 10);
/// ```
pub fn new(row: usize, column: usize) -> Self {
let row = row.try_into().expect("Location::row over u32");
let column = column.try_into().expect("Location::column over u32");
Location { row, column }
}
/// Current row
pub fn row(&self) -> usize {
self.row as usize
}
/// Current column
pub fn column(&self) -> usize {
self.column as usize
}
pub fn reset(&mut self) {
self.row = 1;
self.column = 0;
}
pub fn go_right(&mut self) {
self.column += 1;
}
pub fn go_left(&mut self) {
self.column -= 1;
}
pub fn newline(&mut self) {
self.row += 1;
self.column = 0;
}
pub fn with_col_offset<T: TryInto<isize>>(&self, offset: T) -> Self
where
<T as TryInto<isize>>::Error: std::fmt::Debug,
{
let column = (self.column as isize
+ offset
.try_into()
.expect("offset should be able to convert to isize")) as u32;
Self {
row: self.row,
column,
}
}
pub fn with_row_offset<T: TryInto<isize>>(&self, offset: T) -> Self
where
<T as TryInto<isize>>::Error: std::fmt::Debug,
{
let row = (self.row as isize
+ offset
.try_into()
.expect("offset should be able to convert to isize")) as u32;
Self {
row,
column: self.column,
}
}
}
pub type LocationRange = std::ops::Range<Location>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gt() {
assert!(Location::new(1, 2) > Location::new(1, 1));
assert!(Location::new(2, 1) > Location::new(1, 1));
assert!(Location::new(2, 1) > Location::new(1, 2));
}
#[test]
fn test_lt() {
assert!(Location::new(1, 1) < Location::new(1, 2));
assert!(Location::new(1, 1) < Location::new(2, 1));
assert!(Location::new(1, 2) < Location::new(2, 1));
}
#[test]
fn test_with_col_offset() {
assert_eq!(Location::new(1, 1).with_col_offset(1), Location::new(1, 2));
assert_eq!(Location::new(1, 1).with_col_offset(-1), Location::new(1, 0));
}
#[test]
fn test_with_row_offset() {
assert_eq!(Location::new(1, 1).with_row_offset(1), Location::new(2, 1));
assert_eq!(Location::new(1, 1).with_row_offset(-1), Location::new(0, 1));
}
}

View file

@ -4,7 +4,10 @@ use std::convert::Infallible;
use num_bigint::{BigInt, Sign};
use num_complex::Complex64;
use crate::{bytecode::*, Location};
use crate::{
bytecode::*,
source_code::{OneIndexed, SourceLocation},
};
pub const FORMAT_VERSION: u32 = 4;
@ -16,6 +19,8 @@ pub enum MarshalError {
InvalidBytecode,
/// Invalid utf8 in string
InvalidUtf8,
/// Invalid source location
InvalidLocation,
/// Bad type marker
BadType,
}
@ -26,6 +31,7 @@ impl fmt::Display for MarshalError {
Self::Eof => f.write_str("unexpected end of data"),
Self::InvalidBytecode => f.write_str("invalid bytecode"),
Self::InvalidUtf8 => f.write_str("invalid utf8"),
Self::InvalidLocation => f.write_str("invalid source location"),
Self::BadType => f.write_str("bad type marker"),
}
}
@ -183,12 +189,12 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
let len = rdr.read_u32()?;
let locations = (0..len)
.map(|_| {
Ok(Location {
row: rdr.read_u32()?,
column: rdr.read_u32()?,
Ok(SourceLocation {
row: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
column: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
})
})
.collect::<Result<Box<[Location]>>>()?;
.collect::<Result<Box<[SourceLocation]>>>()?;
let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?);
@ -199,7 +205,8 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
let len = rdr.read_u32()?;
let source_path = bag.make_name(rdr.read_str(len)?);
let first_line_number = rdr.read_u32()?;
let first_line_number =
OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?;
let max_stackdepth = rdr.read_u32()?;
let len = rdr.read_u32()?;
@ -586,8 +593,8 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
write_len(buf, code.locations.len());
for loc in &*code.locations {
buf.write_u32(loc.row);
buf.write_u32(loc.column);
buf.write_u32(loc.row.get() as _);
buf.write_u32(loc.column.get() as _);
}
buf.write_u16(code.flags.bits());
@ -598,7 +605,7 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
write_vec(buf, code.source_path.as_ref().as_bytes());
buf.write_u32(code.first_line_number);
buf.write_u32(code.first_line_number.get());
buf.write_u32(code.max_stackdepth);
write_vec(buf, code.obj_name.as_ref().as_bytes());