numerous refactoring

- Split parser core and compiler core. Fix #14
- AST int type to `u32`
- Updated asdl_rs.py and update_asdl.sh fix #6
- Use `ruff_python_ast::SourceLocation` for Python source location. Deleted our own Location.
- Renamed ast::Located to ast::Attributed to distinguish terms for TextSize and SourceLocation
- `ast::<Node>`s for TextSize located ast. `ast::located::<Node>` for Python source located ast.
- And also strictly renaming `located` to refer only python location related interfaces.
- `SourceLocator` to convert locations.
- New `source-code` features of to disable python locations when unnecessary.
- Also including fully merging https://github.com/astral-sh/RustPython/pull/4 closes #9
This commit is contained in:
Jeong YunWon 2023-05-10 02:36:52 +09:00
parent 09a6afdd04
commit a3d9d8cb14
29 changed files with 9737 additions and 12000 deletions

View file

@ -1,6 +1,6 @@
[package]
name = "rustpython-compiler-core"
description = "RustPython specific bytecode."
name = "rustpython-parser-core"
description = "RustPython parser data types."
version = "0.2.0"
authors = ["RustPython Team"]
edition = "2021"
@ -8,7 +8,6 @@ repository = "https://github.com/RustPython/RustPython"
license = "MIT"
[dependencies]
bitflags = { workspace = true }
itertools = { workspace = true }
num-bigint = { workspace = true }
num-complex = { workspace = true }
@ -18,3 +17,6 @@ ruff_python_ast = { workspace = true }
lz4_flex = "0.9.2"
[features]
default = ["source-code"]
source-code = []

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
use crate::{source_code::SourceLocation, text_size::TextSize};
use crate::text_size::TextSize;
use std::fmt::Display;
#[derive(Debug, PartialEq, Eq)]
@ -61,79 +61,4 @@ impl<T> BaseError<T> {
{
BaseError::from(self)
}
pub fn into_located<U>(self, locator: &mut super::SourceLocator) -> LocatedError<U>
where
T: Into<U>,
{
let location = locator.locate(self.offset);
LocatedError {
error: self.error.into(),
location: Some(location),
source_path: self.source_path,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedError<T> {
pub error: T,
pub location: Option<SourceLocation>,
pub source_path: String,
}
impl<T> LocatedError<T> {
pub fn error(self) -> T {
self.error
}
pub fn from<U>(obj: LocatedError<U>) -> Self
where
U: Into<T>,
{
Self {
error: obj.error.into(),
location: obj.location,
source_path: obj.source_path,
}
}
pub fn into<U>(self) -> LocatedError<U>
where
T: Into<U>,
{
LocatedError::from(self)
}
pub fn python_location(&self) -> (usize, usize) {
if let Some(location) = self.location {
(
location.row.to_one_indexed(),
location.column.to_one_indexed(),
)
} else {
(0, 0)
}
}
}
impl<T> Display for LocatedError<T>
where
T: std::fmt::Display,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let (row, column) = self.location.map_or((0, 0), |l| {
(l.row.to_one_indexed(), l.column.to_one_indexed())
});
write!(f, "{} at row {} col {}", &self.error, row, column,)
}
}
impl<T> std::error::Error for LocatedError<T>
where
T: std::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.error)
}
}

13
core/src/format.rs Normal file
View file

@ -0,0 +1,13 @@
/// Transforms a value prior to formatting it.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum ConversionFlag {
/// No conversion
None = 0, // CPython uses -1 but not pleasure for us
/// Converts by calling `str(<value>)`.
Str = b's',
/// Converts by calling `ascii(<value>)`.
Ascii = b'a',
/// Converts by calling `repr(<value>)`.
Repr = b'r',
}

View file

@ -1,41 +1,15 @@
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-compiler-core/")]
#![doc(html_root_url = "https://docs.rs/rustpython-parser-core/")]
// parser core
mod error;
mod mode;
mod format;
pub mod mode;
#[cfg(feature = "source-code")]
pub mod source_code;
pub use error::BaseError;
pub use format::ConversionFlag;
pub use mode::Mode;
pub use ruff_text_size as text_size; // re-export mandatory and frequently accessed dependency
// compiler core
mod bytecode;
pub mod marshal;
pub use bytecode::*;
pub use error::LocatedError;
pub use ruff_python_ast::source_code;
pub use ruff_python_ast::source_code::OneIndexed as LineNumber;
use source_code::{LineIndex, SourceCode, SourceLocation};
use text_size::TextSize;
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct SourceLocator<'a> {
pub source: &'a str,
index: LineIndex,
}
impl<'a> SourceLocator<'a> {
#[inline]
pub fn new(source: &'a str) -> Self {
let index = LineIndex::from_source_text(source);
Self { source, index }
}
pub fn locate(&mut self, offset: TextSize) -> SourceLocation {
let code = SourceCode::new(self.source, &self.index);
let offset = unsafe { std::mem::transmute(offset) }; // temp code to fix text_size dependency
code.source_location(offset)
}
}
// re-export our public interface
pub use ruff_text_size as text_size;

View file

@ -1,635 +0,0 @@
use core::fmt;
use std::convert::Infallible;
use num_bigint::{BigInt, Sign};
use num_complex::Complex64;
use crate::{
bytecode::*,
source_code::{OneIndexed, SourceLocation},
};
pub const FORMAT_VERSION: u32 = 4;
#[derive(Debug)]
pub enum MarshalError {
/// Unexpected End Of File
Eof,
/// Invalid Bytecode
InvalidBytecode,
/// Invalid utf8 in string
InvalidUtf8,
/// Invalid source location
InvalidLocation,
/// Bad type marker
BadType,
}
impl fmt::Display for MarshalError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Eof => f.write_str("unexpected end of data"),
Self::InvalidBytecode => f.write_str("invalid bytecode"),
Self::InvalidUtf8 => f.write_str("invalid utf8"),
Self::InvalidLocation => f.write_str("invalid source location"),
Self::BadType => f.write_str("bad type marker"),
}
}
}
impl From<std::str::Utf8Error> for MarshalError {
fn from(_: std::str::Utf8Error) -> Self {
Self::InvalidUtf8
}
}
impl std::error::Error for MarshalError {}
type Result<T, E = MarshalError> = std::result::Result<T, E>;
#[repr(u8)]
enum Type {
// Null = b'0',
None = b'N',
False = b'F',
True = b'T',
StopIter = b'S',
Ellipsis = b'.',
Int = b'i',
Float = b'g',
Complex = b'y',
// Long = b'l', // i32
Bytes = b's', // = TYPE_STRING
// Interned = b't',
// Ref = b'r',
Tuple = b'(',
List = b'[',
Dict = b'{',
Code = b'c',
Unicode = b'u',
// Unknown = b'?',
Set = b'<',
FrozenSet = b'>',
Ascii = b'a',
// AsciiInterned = b'A',
// SmallTuple = b')',
// ShortAscii = b'z',
// ShortAsciiInterned = b'Z',
}
// const FLAG_REF: u8 = b'\x80';
impl TryFrom<u8> for Type {
type Error = MarshalError;
fn try_from(value: u8) -> Result<Self> {
use Type::*;
Ok(match value {
// b'0' => Null,
b'N' => None,
b'F' => False,
b'T' => True,
b'S' => StopIter,
b'.' => Ellipsis,
b'i' => Int,
b'g' => Float,
b'y' => Complex,
// b'l' => Long,
b's' => Bytes,
// b't' => Interned,
// b'r' => Ref,
b'(' => Tuple,
b'[' => List,
b'{' => Dict,
b'c' => Code,
b'u' => Unicode,
// b'?' => Unknown,
b'<' => Set,
b'>' => FrozenSet,
b'a' => Ascii,
// b'A' => AsciiInterned,
// b')' => SmallTuple,
// b'z' => ShortAscii,
// b'Z' => ShortAsciiInterned,
_ => return Err(MarshalError::BadType),
})
}
}
pub trait Read {
fn read_slice(&mut self, n: u32) -> Result<&[u8]>;
fn read_array<const N: usize>(&mut self) -> Result<&[u8; N]> {
self.read_slice(N as u32).map(|s| s.try_into().unwrap())
}
fn read_str(&mut self, len: u32) -> Result<&str> {
Ok(std::str::from_utf8(self.read_slice(len)?)?)
}
fn read_u8(&mut self) -> Result<u8> {
Ok(u8::from_le_bytes(*self.read_array()?))
}
fn read_u16(&mut self) -> Result<u16> {
Ok(u16::from_le_bytes(*self.read_array()?))
}
fn read_u32(&mut self) -> Result<u32> {
Ok(u32::from_le_bytes(*self.read_array()?))
}
fn read_u64(&mut self) -> Result<u64> {
Ok(u64::from_le_bytes(*self.read_array()?))
}
}
pub(crate) trait ReadBorrowed<'a>: Read {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
}
}
impl Read for &[u8] {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
self.read_slice_borrow(n)
}
}
impl<'a> ReadBorrowed<'a> for &'a [u8] {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
*self = &self[n as usize..];
Ok(data)
}
}
pub struct Cursor<B> {
pub data: B,
pub position: usize,
}
impl<B: AsRef<[u8]>> Read for Cursor<B> {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
let data = &self.data.as_ref()[self.position..];
let slice = data.get(..n as usize).ok_or(MarshalError::Eof)?;
self.position += n as usize;
Ok(slice)
}
}
pub fn deserialize_code<R: Read, Bag: ConstantBag>(
rdr: &mut R,
bag: Bag,
) -> Result<CodeObject<Bag::Constant>> {
let len = rdr.read_u32()?;
let instructions = rdr.read_slice(len * 2)?;
let instructions = instructions
.chunks_exact(2)
.map(|cu| {
let op = Instruction::try_from(cu[0])?;
let arg = OpArgByte(cu[1]);
Ok(CodeUnit { op, arg })
})
.collect::<Result<Box<[CodeUnit]>>>()?;
let len = rdr.read_u32()?;
let locations = (0..len)
.map(|_| {
Ok(SourceLocation {
row: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
column: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
})
})
.collect::<Result<Box<[SourceLocation]>>>()?;
let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?);
let posonlyarg_count = rdr.read_u32()?;
let arg_count = rdr.read_u32()?;
let kwonlyarg_count = rdr.read_u32()?;
let len = rdr.read_u32()?;
let source_path = bag.make_name(rdr.read_str(len)?);
let first_line_number =
OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?;
let max_stackdepth = rdr.read_u32()?;
let len = rdr.read_u32()?;
let obj_name = bag.make_name(rdr.read_str(len)?);
let len = rdr.read_u32()?;
let cell2arg = (len != 0)
.then(|| {
(0..len)
.map(|_| Ok(rdr.read_u32()? as i32))
.collect::<Result<Box<[i32]>>>()
})
.transpose()?;
let len = rdr.read_u32()?;
let constants = (0..len)
.map(|_| deserialize_value(rdr, bag))
.collect::<Result<Box<[_]>>>()?;
let mut read_names = || {
let len = rdr.read_u32()?;
(0..len)
.map(|_| {
let len = rdr.read_u32()?;
Ok(bag.make_name(rdr.read_str(len)?))
})
.collect::<Result<Box<[_]>>>()
};
let names = read_names()?;
let varnames = read_names()?;
let cellvars = read_names()?;
let freevars = read_names()?;
Ok(CodeObject {
instructions,
locations,
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
max_stackdepth,
obj_name,
cell2arg,
constants,
names,
varnames,
cellvars,
freevars,
})
}
pub trait MarshalBag: Copy {
type Value;
fn make_bool(&self, value: bool) -> Self::Value;
fn make_none(&self) -> Self::Value;
fn make_ellipsis(&self) -> Self::Value;
fn make_float(&self, value: f64) -> Self::Value;
fn make_complex(&self, value: Complex64) -> Self::Value;
fn make_str(&self, value: &str) -> Self::Value;
fn make_bytes(&self, value: &[u8]) -> Self::Value;
fn make_int(&self, value: BigInt) -> Self::Value;
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value;
fn make_code(
&self,
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
) -> Self::Value;
fn make_stop_iter(&self) -> Result<Self::Value>;
fn make_list(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_set(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_frozenset(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_dict(
&self,
it: impl Iterator<Item = (Self::Value, Self::Value)>,
) -> Result<Self::Value>;
type ConstantBag: ConstantBag;
fn constant_bag(self) -> Self::ConstantBag;
}
impl<Bag: ConstantBag> MarshalBag for Bag {
type Value = Bag::Constant;
fn make_bool(&self, value: bool) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Boolean { value })
}
fn make_none(&self) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::None)
}
fn make_ellipsis(&self) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Ellipsis)
}
fn make_float(&self, value: f64) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Float { value })
}
fn make_complex(&self, value: Complex64) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Complex { value })
}
fn make_str(&self, value: &str) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Str { value })
}
fn make_bytes(&self, value: &[u8]) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Bytes { value })
}
fn make_int(&self, value: BigInt) -> Self::Value {
self.make_int(value)
}
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value {
self.make_tuple(elements)
}
fn make_code(
&self,
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
) -> Self::Value {
self.make_code(code)
}
fn make_stop_iter(&self) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_list(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_set(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_frozenset(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_dict(
&self,
_: impl Iterator<Item = (Self::Value, Self::Value)>,
) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
type ConstantBag = Self;
fn constant_bag(self) -> Self::ConstantBag {
self
}
}
pub fn deserialize_value<R: Read, Bag: MarshalBag>(rdr: &mut R, bag: Bag) -> Result<Bag::Value> {
let typ = Type::try_from(rdr.read_u8()?)?;
let value = match typ {
Type::True => bag.make_bool(true),
Type::False => bag.make_bool(false),
Type::None => bag.make_none(),
Type::StopIter => bag.make_stop_iter()?,
Type::Ellipsis => bag.make_ellipsis(),
Type::Int => {
let len = rdr.read_u32()? as i32;
let sign = if len < 0 { Sign::Minus } else { Sign::Plus };
let bytes = rdr.read_slice(len.unsigned_abs())?;
let int = BigInt::from_bytes_le(sign, bytes);
bag.make_int(int)
}
Type::Float => {
let value = f64::from_bits(rdr.read_u64()?);
bag.make_float(value)
}
Type::Complex => {
let re = f64::from_bits(rdr.read_u64()?);
let im = f64::from_bits(rdr.read_u64()?);
let value = Complex64 { re, im };
bag.make_complex(value)
}
Type::Ascii | Type::Unicode => {
let len = rdr.read_u32()?;
let value = rdr.read_str(len)?;
bag.make_str(value)
}
Type::Tuple => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_tuple(it))?
}
Type::List => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_list(it))??
}
Type::Set => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_set(it))??
}
Type::FrozenSet => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_frozenset(it))??
}
Type::Dict => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| {
let k = deserialize_value(rdr, bag)?;
let v = deserialize_value(rdr, bag)?;
Ok::<_, MarshalError>((k, v))
});
itertools::process_results(it, |it| bag.make_dict(it))??
}
Type::Bytes => {
// Following CPython, after marshaling, byte arrays are converted into bytes.
let len = rdr.read_u32()?;
let value = rdr.read_slice(len)?;
bag.make_bytes(value)
}
Type::Code => bag.make_code(deserialize_code(rdr, bag.constant_bag())?),
};
Ok(value)
}
pub trait Dumpable: Sized {
type Error;
type Constant: Constant;
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error>;
}
pub enum DumpableValue<'a, D: Dumpable> {
Integer(&'a BigInt),
Float(f64),
Complex(Complex64),
Boolean(bool),
Str(&'a str),
Bytes(&'a [u8]),
Code(&'a CodeObject<D::Constant>),
Tuple(&'a [D]),
None,
Ellipsis,
StopIter,
List(&'a [D]),
Set(&'a [D]),
Frozenset(&'a [D]),
Dict(&'a [(D, D)]),
}
impl<'a, C: Constant> From<BorrowedConstant<'a, C>> for DumpableValue<'a, C> {
fn from(c: BorrowedConstant<'a, C>) -> Self {
match c {
BorrowedConstant::Integer { value } => Self::Integer(value),
BorrowedConstant::Float { value } => Self::Float(value),
BorrowedConstant::Complex { value } => Self::Complex(value),
BorrowedConstant::Boolean { value } => Self::Boolean(value),
BorrowedConstant::Str { value } => Self::Str(value),
BorrowedConstant::Bytes { value } => Self::Bytes(value),
BorrowedConstant::Code { code } => Self::Code(code),
BorrowedConstant::Tuple { elements } => Self::Tuple(elements),
BorrowedConstant::None => Self::None,
BorrowedConstant::Ellipsis => Self::Ellipsis,
}
}
}
impl<C: Constant> Dumpable for C {
type Error = Infallible;
type Constant = Self;
#[inline(always)]
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error> {
Ok(f(self.borrow_constant().into()))
}
}
pub trait Write {
fn write_slice(&mut self, slice: &[u8]);
fn write_u8(&mut self, v: u8) {
self.write_slice(&v.to_le_bytes())
}
fn write_u16(&mut self, v: u16) {
self.write_slice(&v.to_le_bytes())
}
fn write_u32(&mut self, v: u32) {
self.write_slice(&v.to_le_bytes())
}
fn write_u64(&mut self, v: u64) {
self.write_slice(&v.to_le_bytes())
}
}
impl Write for Vec<u8> {
fn write_slice(&mut self, slice: &[u8]) {
self.extend_from_slice(slice)
}
}
pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
let Ok(len) = len.try_into() else { panic!("too long to serialize") };
buf.write_u32(len);
}
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
write_len(buf, slice.len());
buf.write_slice(slice);
}
pub fn serialize_value<W: Write, D: Dumpable>(
buf: &mut W,
constant: DumpableValue<'_, D>,
) -> Result<(), D::Error> {
match constant {
DumpableValue::Integer(int) => {
buf.write_u8(Type::Int as u8);
let (sign, bytes) = int.to_bytes_le();
let len: i32 = bytes.len().try_into().expect("too long to serialize");
let len = if sign == Sign::Minus { -len } else { len };
buf.write_u32(len as u32);
buf.write_slice(&bytes);
}
DumpableValue::Float(f) => {
buf.write_u8(Type::Float as u8);
buf.write_u64(f.to_bits());
}
DumpableValue::Complex(c) => {
buf.write_u8(Type::Complex as u8);
buf.write_u64(c.re.to_bits());
buf.write_u64(c.im.to_bits());
}
DumpableValue::Boolean(b) => {
buf.write_u8(if b { Type::True } else { Type::False } as u8);
}
DumpableValue::Str(s) => {
buf.write_u8(Type::Unicode as u8);
write_vec(buf, s.as_bytes());
}
DumpableValue::Bytes(b) => {
buf.write_u8(Type::Bytes as u8);
write_vec(buf, b);
}
DumpableValue::Code(c) => {
buf.write_u8(Type::Code as u8);
serialize_code(buf, c);
}
DumpableValue::Tuple(tup) => {
buf.write_u8(Type::Tuple as u8);
write_len(buf, tup.len());
for val in tup {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::None => {
buf.write_u8(Type::None as u8);
}
DumpableValue::Ellipsis => {
buf.write_u8(Type::Ellipsis as u8);
}
DumpableValue::StopIter => {
buf.write_u8(Type::StopIter as u8);
}
DumpableValue::List(l) => {
buf.write_u8(Type::List as u8);
write_len(buf, l.len());
for val in l {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Set(set) => {
buf.write_u8(Type::Set as u8);
write_len(buf, set.len());
for val in set {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Frozenset(set) => {
buf.write_u8(Type::FrozenSet as u8);
write_len(buf, set.len());
for val in set {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Dict(d) => {
buf.write_u8(Type::Dict as u8);
write_len(buf, d.len());
for (k, v) in d {
k.with_dump(|val| serialize_value(buf, val))??;
v.with_dump(|val| serialize_value(buf, val))??;
}
}
}
Ok(())
}
pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
write_len(buf, code.instructions.len());
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
buf.write_slice(instructions_bytes);
write_len(buf, code.locations.len());
for loc in &*code.locations {
buf.write_u32(loc.row.get() as _);
buf.write_u32(loc.column.get() as _);
}
buf.write_u16(code.flags.bits());
buf.write_u32(code.posonlyarg_count);
buf.write_u32(code.arg_count);
buf.write_u32(code.kwonlyarg_count);
write_vec(buf, code.source_path.as_ref().as_bytes());
buf.write_u32(code.first_line_number.get());
buf.write_u32(code.max_stackdepth);
write_vec(buf, code.obj_name.as_ref().as_bytes());
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
write_len(buf, cell2arg.len());
for &i in cell2arg {
buf.write_u32(i as u32)
}
write_len(buf, code.constants.len());
for constant in &*code.constants {
serialize_value(buf, constant.borrow_constant().into()).unwrap_or_else(|x| match x {})
}
let mut write_names = |names: &[C::Name]| {
write_len(buf, names.len());
for name in names {
write_vec(buf, name.as_ref().as_bytes());
}
};
write_names(&code.names);
write_names(&code.varnames);
write_names(&code.cellvars);
write_names(&code.freevars);
}

View file

@ -1,27 +1,30 @@
//! Control in the different modes by which a source file can be parsed.
/// The mode argument specifies in what way code must be parsed.
#[derive(Clone, Copy)]
pub enum Mode {
Exec,
Eval,
Single,
BlockExpr,
/// The code consists of a sequence of statements.
Module,
/// The code consists of a sequence of interactive statement.
Interactive,
/// The code consists of a single expression.
Expression,
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
// To support `builtins.compile()` `mode` argument
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" => Ok(Mode::Exec),
"eval" => Ok(Mode::Eval),
"single" => Ok(Mode::Single),
_ => Err(ModeParseError(())),
"exec" | "single" => Ok(Mode::Module),
"eval" => Ok(Mode::Expression),
_ => Err(ModeParseError),
}
}
}
/// Returned when a given mode is not valid.
#[derive(Debug)]
pub struct ModeParseError(());
pub struct ModeParseError;
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {

126
core/src/source_code.rs Normal file
View file

@ -0,0 +1,126 @@
// re-export our public interface
pub use ruff_python_ast::source_code::*;
pub type LineNumber = ruff_python_ast::source_code::OneIndexed;
#[derive(Debug)]
pub struct SourceRange {
pub start: SourceLocation,
pub end: Option<SourceLocation>,
}
impl SourceRange {
pub fn new(start: SourceLocation, end: SourceLocation) -> Self {
Self {
start,
end: Some(end),
}
}
pub fn unwrap_end(&self) -> SourceLocation {
self.end.unwrap()
}
}
impl From<std::ops::Range<SourceLocation>> for SourceRange {
fn from(value: std::ops::Range<SourceLocation>) -> Self {
Self {
start: value.start,
end: Some(value.end),
}
}
}
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct SourceLocator<'a> {
pub source: &'a str,
index: LineIndex,
}
impl<'a> SourceLocator<'a> {
#[inline]
pub fn new(source: &'a str) -> Self {
let index = LineIndex::from_source_text(source);
Self { source, index }
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode::new(self.source, &self.index)
}
pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
let offset = offset.to_u32().into();
self.to_source_code().source_location(offset)
}
pub fn locate_error<T, U>(&mut self, base: crate::error::BaseError<T>) -> LocatedError<U>
where
T: Into<U>,
{
let location = self.locate(base.offset);
LocatedError {
error: base.error.into(),
location: Some(location),
source_path: base.source_path,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedError<T> {
pub error: T,
pub location: Option<SourceLocation>,
pub source_path: String,
}
impl<T> LocatedError<T> {
pub fn error(self) -> T {
self.error
}
pub fn from<U>(obj: LocatedError<U>) -> Self
where
U: Into<T>,
{
Self {
error: obj.error.into(),
location: obj.location,
source_path: obj.source_path,
}
}
pub fn into<U>(self) -> LocatedError<U>
where
T: Into<U>,
{
LocatedError::from(self)
}
pub fn python_location(&self) -> (usize, usize) {
if let Some(location) = self.location {
(location.row.to_usize(), location.column.to_usize())
} else {
(0, 0)
}
}
}
impl<T> std::fmt::Display for LocatedError<T>
where
T: std::fmt::Display,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let (row, column) = self
.location
.map_or((0, 0), |l| (l.row.to_usize(), l.column.to_usize()));
write!(f, "{} at row {} col {}", &self.error, row, column,)
}
}
impl<T> std::error::Error for LocatedError<T>
where
T: std::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.error)
}
}