mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-07 21:25:31 +00:00
numerous refactoring
- Split parser core and compiler core. Fix #14 - AST int type to `u32` - Updated asdl_rs.py and update_asdl.sh fix #6 - Use `ruff_python_ast::SourceLocation` for Python source location. Deleted our own Location. - Renamed ast::Located to ast::Attributed to distinguish terms for TextSize and SourceLocation - `ast::<Node>`s for TextSize located ast. `ast::located::<Node>` for Python source located ast. - And also strictly renaming `located` to refer only python location related interfaces. - `SourceLocator` to convert locations. - New `source-code` features of to disable python locations when unnecessary. - Also including fully merging https://github.com/astral-sh/RustPython/pull/4 closes #9
This commit is contained in:
parent
09a6afdd04
commit
a3d9d8cb14
29 changed files with 9737 additions and 12000 deletions
|
@ -16,11 +16,12 @@ members = [
|
|||
]
|
||||
|
||||
[workspace.dependencies]
|
||||
rustpython-ast = { path = "ast", version = "0.2.0" }
|
||||
rustpython-parser-core = { path = "core", version = "0.2.0" }
|
||||
rustpython-literal = { path = "literal", version = "0.2.0" }
|
||||
|
||||
ahash = "0.7.6"
|
||||
anyhow = "1.0.45"
|
||||
ascii = "1.0"
|
||||
bitflags = "1.3.2"
|
||||
bstr = "0.2.17"
|
||||
cfg-if = "1.0"
|
||||
insta = "1.14.0"
|
||||
itertools = "0.10.3"
|
||||
|
@ -32,7 +33,7 @@ rand = "0.8.5"
|
|||
serde = "1.0"
|
||||
static_assertions = "1.1"
|
||||
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
|
||||
ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "583df5c1fa43b2732896219f8ab425116c140c80" }
|
||||
ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "088958e8fda2f74f1ebf315c75db13c232409b13" }
|
||||
# ruff_python_ast = { path = "../ruff/crates/ruff_python_ast" }
|
||||
|
||||
[profile.dev.package."*"]
|
||||
|
|
|
@ -8,14 +8,14 @@ repository = "https://github.com/RustPython/RustPython"
|
|||
license = "MIT"
|
||||
|
||||
[features]
|
||||
default = ["constant-optimization", "fold", "location"]
|
||||
default = ["constant-optimization", "fold", "source-code"]
|
||||
constant-optimization = ["fold"]
|
||||
location = []
|
||||
source-code = ["fold"]
|
||||
fold = []
|
||||
unparse = ["rustpython-literal"]
|
||||
|
||||
[dependencies]
|
||||
rustpython-compiler-core = { path = "../core", version = "0.2.0" }
|
||||
rustpython-literal = { path = "../literal", version = "0.2.0", optional = true }
|
||||
rustpython-parser-core = { workspace = true }
|
||||
rustpython-literal = { workspace = true, optional = true }
|
||||
|
||||
num-bigint = { workspace = true }
|
||||
|
|
|
@ -8,7 +8,6 @@ import textwrap
|
|||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict
|
||||
from attr import dataclass
|
||||
|
||||
import asdl
|
||||
|
||||
|
@ -18,7 +17,7 @@ AUTOGEN_MESSAGE = "// File automatically generated by {}.\n"
|
|||
builtin_type_mapping = {
|
||||
"identifier": "Ident",
|
||||
"string": "String",
|
||||
"int": "usize",
|
||||
"int": "u32",
|
||||
"constant": "Constant",
|
||||
}
|
||||
assert builtin_type_mapping.keys() == asdl.builtin_types
|
||||
|
@ -391,7 +390,18 @@ class FoldTraitDefVisitor(EmitVisitor):
|
|||
depth + 1,
|
||||
)
|
||||
self.emit(
|
||||
"fn map_located<T>(&mut self, located: Attributed<T, U>) -> Result<Attributed<T, Self::TargetU>, Self::Error> { let custom = self.map_user(located.custom)?; Ok(Attributed { range: located.range, custom, node: located.node }) }",
|
||||
"""
|
||||
fn map_located<T>(&mut self, located: Attributed<T, U>) -> Result<Attributed<T, Self::TargetU>, Self::Error> {
|
||||
let custom = self.map_user(located.custom)?;
|
||||
Ok(Attributed { range: located.range, custom, node: located.node })
|
||||
}""",
|
||||
depth + 1,
|
||||
)
|
||||
self.emit(
|
||||
"""
|
||||
fn fold<X: Foldable<U, Self::TargetU>>(&mut self, node: X) -> Result<X::Mapped, Self::Error> {
|
||||
node.fold(self)
|
||||
}""",
|
||||
depth + 1,
|
||||
)
|
||||
for dfn in mod.dfns:
|
||||
|
@ -715,8 +725,8 @@ class TraitImplVisitor(EmitVisitor):
|
|||
return ",".join(rust_field(f.name) for f in fields)
|
||||
|
||||
def gen_sum_fromobj(self, sum, sumname, enumname, rustname, depth):
|
||||
if sum.attributes:
|
||||
self.extract_location(sumname, depth)
|
||||
# if sum.attributes:
|
||||
# self.extract_location(sumname, depth)
|
||||
|
||||
self.emit("let _cls = _object.class();", depth)
|
||||
self.emit("Ok(", depth)
|
||||
|
@ -739,8 +749,8 @@ class TraitImplVisitor(EmitVisitor):
|
|||
self.emit("})", depth)
|
||||
|
||||
def gen_product_fromobj(self, product, prodname, structname, depth):
|
||||
if product.attributes:
|
||||
self.extract_location(prodname, depth)
|
||||
# if product.attributes:
|
||||
# self.extract_location(prodname, depth)
|
||||
|
||||
self.emit("Ok(", depth)
|
||||
self.gen_construction(structname, product, prodname, depth + 1)
|
||||
|
@ -761,11 +771,15 @@ class TraitImplVisitor(EmitVisitor):
|
|||
def extract_location(self, typename, depth):
|
||||
row = self.decode_field(asdl.Field("int", "lineno"), typename)
|
||||
column = self.decode_field(asdl.Field("int", "col_offset"), typename)
|
||||
self.emit(f"""let _location = {{
|
||||
let row = try_location_field({row}, _vm)?;
|
||||
let column = try_location_field({column}, _vm)?;
|
||||
SourceLocation {{ row, column }}
|
||||
}};""", depth)
|
||||
self.emit(
|
||||
f"""
|
||||
let _location = {{
|
||||
let row = {row};
|
||||
let column = {column};
|
||||
try_location(row, column)
|
||||
}};""",
|
||||
depth,
|
||||
)
|
||||
|
||||
def decode_field(self, field, typename):
|
||||
name = json.dumps(field.name)
|
||||
|
@ -805,7 +819,7 @@ def write_located_def(typeinfo, f):
|
|||
f.write(
|
||||
textwrap.dedent(
|
||||
"""
|
||||
use crate::location::SourceRange;
|
||||
use rustpython_parser_core::source_code::SourceRange;
|
||||
|
||||
pub type Located<T> = super::generic::Attributed<T, SourceRange>;
|
||||
"""
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
use crate::location::{SourceLocation, SourceRange};
|
||||
use rustpython_compiler_core::text_size::{TextRange, TextSize};
|
||||
use rustpython_parser_core::{
|
||||
source_code::{SourceLocation, SourceRange},
|
||||
text_size::{TextRange, TextSize},
|
||||
};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Attributed<T, U = ()> {
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use num_bigint::BigInt;
|
||||
pub use rustpython_compiler_core::ConversionFlag;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Constant {
|
||||
|
@ -137,7 +136,7 @@ impl<U> crate::fold::Fold<U> for ConstantOptimizer {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rustpython_compiler_core::text_size::TextRange;
|
||||
use rustpython_parser_core::text_size::TextRange;
|
||||
|
||||
#[cfg(feature = "constant-optimization")]
|
||||
#[test]
|
||||
|
|
|
@ -62,4 +62,4 @@ macro_rules! simple_fold {
|
|||
};
|
||||
}
|
||||
|
||||
simple_fold!(usize, String, bool, constant::Constant);
|
||||
simple_fold!(u32, String, bool, constant::Constant);
|
||||
|
|
|
@ -158,7 +158,7 @@ pub struct StmtAnnAssign<U = ()> {
|
|||
pub target: Box<Expr<U>>,
|
||||
pub annotation: Box<Expr<U>>,
|
||||
pub value: Option<Box<Expr<U>>>,
|
||||
pub simple: usize,
|
||||
pub simple: u32,
|
||||
}
|
||||
|
||||
impl<U> From<StmtAnnAssign<U>> for StmtKind<U> {
|
||||
|
@ -328,7 +328,7 @@ impl<U> From<StmtImport<U>> for StmtKind<U> {
|
|||
pub struct StmtImportFrom<U = ()> {
|
||||
pub module: Option<Ident>,
|
||||
pub names: Vec<Alias<U>>,
|
||||
pub level: Option<usize>,
|
||||
pub level: Option<u32>,
|
||||
}
|
||||
|
||||
impl<U> From<StmtImportFrom<U>> for StmtKind<U> {
|
||||
|
@ -610,7 +610,7 @@ impl<U> From<ExprCall<U>> for ExprKind<U> {
|
|||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct ExprFormattedValue<U = ()> {
|
||||
pub value: Box<Expr<U>>,
|
||||
pub conversion: usize,
|
||||
pub conversion: u32,
|
||||
pub format_spec: Option<Box<Expr<U>>>,
|
||||
}
|
||||
|
||||
|
@ -819,7 +819,7 @@ pub struct Comprehension<U = ()> {
|
|||
pub target: Expr<U>,
|
||||
pub iter: Expr<U>,
|
||||
pub ifs: Vec<Expr<U>>,
|
||||
pub is_async: usize,
|
||||
pub is_async: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
|
@ -996,7 +996,7 @@ pub type Pattern<U = ()> = Attributed<PatternKind<U>, U>;
|
|||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct TypeIgnoreTypeIgnore {
|
||||
pub lineno: usize,
|
||||
pub lineno: u32,
|
||||
pub tag: String,
|
||||
}
|
||||
|
||||
|
@ -1019,6 +1019,7 @@ pub mod fold {
|
|||
type TargetU;
|
||||
type Error;
|
||||
fn map_user(&mut self, user: U) -> Result<Self::TargetU, Self::Error>;
|
||||
|
||||
fn map_located<T>(
|
||||
&mut self,
|
||||
located: Attributed<T, U>,
|
||||
|
@ -1030,6 +1031,13 @@ pub mod fold {
|
|||
node: located.node,
|
||||
})
|
||||
}
|
||||
|
||||
fn fold<X: Foldable<U, Self::TargetU>>(
|
||||
&mut self,
|
||||
node: X,
|
||||
) -> Result<X::Mapped, Self::Error> {
|
||||
node.fold(self)
|
||||
}
|
||||
fn fold_mod(&mut self, node: Mod<U>) -> Result<Mod<Self::TargetU>, Self::Error> {
|
||||
fold_mod(self, node)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// File automatically generated by ast/asdl_rs.py.
|
||||
|
||||
use crate::location::SourceRange;
|
||||
use rustpython_parser_core::source_code::SourceRange;
|
||||
|
||||
pub type Located<T> = super::generic::Attributed<T, SourceRange>;
|
||||
pub type Mod = super::generic::Mod<SourceRange>;
|
||||
|
|
|
@ -7,53 +7,21 @@ mod generic {
|
|||
include!("gen/generic.rs");
|
||||
}
|
||||
mod impls;
|
||||
#[cfg(feature = "location")]
|
||||
pub mod located {
|
||||
include!("gen/located.rs");
|
||||
}
|
||||
#[cfg(feature = "location")]
|
||||
mod locator;
|
||||
#[cfg(feature = "location")]
|
||||
pub use crate::locator::locate;
|
||||
#[cfg(feature = "location")]
|
||||
pub use rustpython_compiler_core::SourceLocator;
|
||||
|
||||
#[cfg(feature = "source-code")]
|
||||
mod source_locator;
|
||||
#[cfg(feature = "unparse")]
|
||||
mod unparse;
|
||||
|
||||
pub use attributed::Attributed;
|
||||
pub use constant::{Constant, ConversionFlag};
|
||||
pub use constant::Constant;
|
||||
pub use generic::*;
|
||||
pub use rustpython_parser_core::{text_size, ConversionFlag};
|
||||
|
||||
pub type Suite<U = ()> = Vec<Stmt<U>>;
|
||||
|
||||
pub mod location {
|
||||
pub use rustpython_compiler_core::source_code::{OneIndexed, SourceLocation};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SourceRange {
|
||||
pub start: SourceLocation,
|
||||
pub end: Option<SourceLocation>,
|
||||
}
|
||||
|
||||
impl SourceRange {
|
||||
pub fn new(start: SourceLocation, end: SourceLocation) -> Self {
|
||||
Self {
|
||||
start,
|
||||
end: Some(end),
|
||||
}
|
||||
}
|
||||
pub fn unwrap_end(&self) -> SourceLocation {
|
||||
self.end.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::ops::Range<SourceLocation>> for SourceRange {
|
||||
fn from(value: std::ops::Range<SourceLocation>) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: Some(value.end),
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "source-code")]
|
||||
pub mod located {
|
||||
include!("gen/located.rs");
|
||||
}
|
||||
|
||||
pub use rustpython_parser_core::source_code;
|
||||
|
|
|
@ -1,11 +1,5 @@
|
|||
use crate::attributed::Attributed;
|
||||
use crate::fold_helpers::Foldable;
|
||||
use crate::location::SourceRange;
|
||||
use rustpython_compiler_core::SourceLocator;
|
||||
|
||||
pub fn locate<X: Foldable<(), SourceRange>>(locator: &mut SourceLocator, ast: X) -> X::Mapped {
|
||||
ast.fold(locator).unwrap()
|
||||
}
|
||||
use rustpython_parser_core::source_code::{SourceLocator, SourceRange};
|
||||
|
||||
impl crate::fold::Fold<()> for SourceLocator<'_> {
|
||||
type TargetU = SourceRange;
|
|
@ -1,7 +1,5 @@
|
|||
use crate::{
|
||||
Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Expr, ExprKind,
|
||||
Operator,
|
||||
};
|
||||
use crate::ConversionFlag;
|
||||
use crate::{Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Expr, ExprKind, Operator};
|
||||
use std::fmt;
|
||||
|
||||
mod precedence {
|
||||
|
@ -452,7 +450,7 @@ impl<'a> Unparser<'a> {
|
|||
fn unparse_formatted<U>(
|
||||
&mut self,
|
||||
val: &Expr<U>,
|
||||
conversion: usize,
|
||||
conversion: u32,
|
||||
spec: Option<&Expr<U>>,
|
||||
) -> fmt::Result {
|
||||
let buffered = to_string_fmt(|f| Unparser::new(f).unparse_expr(val, precedence::TEST + 1));
|
||||
|
@ -466,7 +464,7 @@ impl<'a> Unparser<'a> {
|
|||
self.p(&buffered)?;
|
||||
drop(buffered);
|
||||
|
||||
if conversion != ConversionFlag::None as usize {
|
||||
if conversion != ConversionFlag::None as u32 {
|
||||
self.p("!")?;
|
||||
let buf = &[conversion as u8];
|
||||
let c = std::str::from_utf8(buf).unwrap();
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "rustpython-compiler-core"
|
||||
description = "RustPython specific bytecode."
|
||||
name = "rustpython-parser-core"
|
||||
description = "RustPython parser data types."
|
||||
version = "0.2.0"
|
||||
authors = ["RustPython Team"]
|
||||
edition = "2021"
|
||||
|
@ -8,7 +8,6 @@ repository = "https://github.com/RustPython/RustPython"
|
|||
license = "MIT"
|
||||
|
||||
[dependencies]
|
||||
bitflags = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
num-bigint = { workspace = true }
|
||||
num-complex = { workspace = true }
|
||||
|
@ -18,3 +17,6 @@ ruff_python_ast = { workspace = true }
|
|||
|
||||
lz4_flex = "0.9.2"
|
||||
|
||||
[features]
|
||||
default = ["source-code"]
|
||||
source-code = []
|
||||
|
|
1613
core/src/bytecode.rs
1613
core/src/bytecode.rs
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
|||
use crate::{source_code::SourceLocation, text_size::TextSize};
|
||||
use crate::text_size::TextSize;
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -61,79 +61,4 @@ impl<T> BaseError<T> {
|
|||
{
|
||||
BaseError::from(self)
|
||||
}
|
||||
|
||||
pub fn into_located<U>(self, locator: &mut super::SourceLocator) -> LocatedError<U>
|
||||
where
|
||||
T: Into<U>,
|
||||
{
|
||||
let location = locator.locate(self.offset);
|
||||
LocatedError {
|
||||
error: self.error.into(),
|
||||
location: Some(location),
|
||||
source_path: self.source_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct LocatedError<T> {
|
||||
pub error: T,
|
||||
pub location: Option<SourceLocation>,
|
||||
pub source_path: String,
|
||||
}
|
||||
|
||||
impl<T> LocatedError<T> {
|
||||
pub fn error(self) -> T {
|
||||
self.error
|
||||
}
|
||||
|
||||
pub fn from<U>(obj: LocatedError<U>) -> Self
|
||||
where
|
||||
U: Into<T>,
|
||||
{
|
||||
Self {
|
||||
error: obj.error.into(),
|
||||
location: obj.location,
|
||||
source_path: obj.source_path,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into<U>(self) -> LocatedError<U>
|
||||
where
|
||||
T: Into<U>,
|
||||
{
|
||||
LocatedError::from(self)
|
||||
}
|
||||
|
||||
pub fn python_location(&self) -> (usize, usize) {
|
||||
if let Some(location) = self.location {
|
||||
(
|
||||
location.row.to_one_indexed(),
|
||||
location.column.to_one_indexed(),
|
||||
)
|
||||
} else {
|
||||
(0, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Display for LocatedError<T>
|
||||
where
|
||||
T: std::fmt::Display,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let (row, column) = self.location.map_or((0, 0), |l| {
|
||||
(l.row.to_one_indexed(), l.column.to_one_indexed())
|
||||
});
|
||||
write!(f, "{} at row {} col {}", &self.error, row, column,)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::error::Error for LocatedError<T>
|
||||
where
|
||||
T: std::error::Error + 'static,
|
||||
{
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
Some(&self.error)
|
||||
}
|
||||
}
|
||||
|
|
13
core/src/format.rs
Normal file
13
core/src/format.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
/// Transforms a value prior to formatting it.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum ConversionFlag {
|
||||
/// No conversion
|
||||
None = 0, // CPython uses -1 but not pleasure for us
|
||||
/// Converts by calling `str(<value>)`.
|
||||
Str = b's',
|
||||
/// Converts by calling `ascii(<value>)`.
|
||||
Ascii = b'a',
|
||||
/// Converts by calling `repr(<value>)`.
|
||||
Repr = b'r',
|
||||
}
|
|
@ -1,41 +1,15 @@
|
|||
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
|
||||
#![doc(html_root_url = "https://docs.rs/rustpython-compiler-core/")]
|
||||
#![doc(html_root_url = "https://docs.rs/rustpython-parser-core/")]
|
||||
|
||||
// parser core
|
||||
mod error;
|
||||
mod mode;
|
||||
mod format;
|
||||
pub mod mode;
|
||||
#[cfg(feature = "source-code")]
|
||||
pub mod source_code;
|
||||
|
||||
pub use error::BaseError;
|
||||
pub use format::ConversionFlag;
|
||||
pub use mode::Mode;
|
||||
pub use ruff_text_size as text_size; // re-export mandatory and frequently accessed dependency
|
||||
|
||||
// compiler core
|
||||
mod bytecode;
|
||||
pub mod marshal;
|
||||
|
||||
pub use bytecode::*;
|
||||
pub use error::LocatedError;
|
||||
pub use ruff_python_ast::source_code;
|
||||
pub use ruff_python_ast::source_code::OneIndexed as LineNumber;
|
||||
|
||||
use source_code::{LineIndex, SourceCode, SourceLocation};
|
||||
use text_size::TextSize;
|
||||
/// Converts source code byte-offset to Python convention line and column numbers.
|
||||
pub struct SourceLocator<'a> {
|
||||
pub source: &'a str,
|
||||
index: LineIndex,
|
||||
}
|
||||
|
||||
impl<'a> SourceLocator<'a> {
|
||||
#[inline]
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let index = LineIndex::from_source_text(source);
|
||||
Self { source, index }
|
||||
}
|
||||
|
||||
pub fn locate(&mut self, offset: TextSize) -> SourceLocation {
|
||||
let code = SourceCode::new(self.source, &self.index);
|
||||
let offset = unsafe { std::mem::transmute(offset) }; // temp code to fix text_size dependency
|
||||
code.source_location(offset)
|
||||
}
|
||||
}
|
||||
// re-export our public interface
|
||||
pub use ruff_text_size as text_size;
|
||||
|
|
|
@ -1,635 +0,0 @@
|
|||
use core::fmt;
|
||||
use std::convert::Infallible;
|
||||
|
||||
use num_bigint::{BigInt, Sign};
|
||||
use num_complex::Complex64;
|
||||
|
||||
use crate::{
|
||||
bytecode::*,
|
||||
source_code::{OneIndexed, SourceLocation},
|
||||
};
|
||||
|
||||
pub const FORMAT_VERSION: u32 = 4;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MarshalError {
|
||||
/// Unexpected End Of File
|
||||
Eof,
|
||||
/// Invalid Bytecode
|
||||
InvalidBytecode,
|
||||
/// Invalid utf8 in string
|
||||
InvalidUtf8,
|
||||
/// Invalid source location
|
||||
InvalidLocation,
|
||||
/// Bad type marker
|
||||
BadType,
|
||||
}
|
||||
|
||||
impl fmt::Display for MarshalError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Eof => f.write_str("unexpected end of data"),
|
||||
Self::InvalidBytecode => f.write_str("invalid bytecode"),
|
||||
Self::InvalidUtf8 => f.write_str("invalid utf8"),
|
||||
Self::InvalidLocation => f.write_str("invalid source location"),
|
||||
Self::BadType => f.write_str("bad type marker"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::str::Utf8Error> for MarshalError {
|
||||
fn from(_: std::str::Utf8Error) -> Self {
|
||||
Self::InvalidUtf8
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for MarshalError {}
|
||||
|
||||
type Result<T, E = MarshalError> = std::result::Result<T, E>;
|
||||
|
||||
#[repr(u8)]
|
||||
enum Type {
|
||||
// Null = b'0',
|
||||
None = b'N',
|
||||
False = b'F',
|
||||
True = b'T',
|
||||
StopIter = b'S',
|
||||
Ellipsis = b'.',
|
||||
Int = b'i',
|
||||
Float = b'g',
|
||||
Complex = b'y',
|
||||
// Long = b'l', // i32
|
||||
Bytes = b's', // = TYPE_STRING
|
||||
// Interned = b't',
|
||||
// Ref = b'r',
|
||||
Tuple = b'(',
|
||||
List = b'[',
|
||||
Dict = b'{',
|
||||
Code = b'c',
|
||||
Unicode = b'u',
|
||||
// Unknown = b'?',
|
||||
Set = b'<',
|
||||
FrozenSet = b'>',
|
||||
Ascii = b'a',
|
||||
// AsciiInterned = b'A',
|
||||
// SmallTuple = b')',
|
||||
// ShortAscii = b'z',
|
||||
// ShortAsciiInterned = b'Z',
|
||||
}
|
||||
// const FLAG_REF: u8 = b'\x80';
|
||||
|
||||
impl TryFrom<u8> for Type {
|
||||
type Error = MarshalError;
|
||||
fn try_from(value: u8) -> Result<Self> {
|
||||
use Type::*;
|
||||
Ok(match value {
|
||||
// b'0' => Null,
|
||||
b'N' => None,
|
||||
b'F' => False,
|
||||
b'T' => True,
|
||||
b'S' => StopIter,
|
||||
b'.' => Ellipsis,
|
||||
b'i' => Int,
|
||||
b'g' => Float,
|
||||
b'y' => Complex,
|
||||
// b'l' => Long,
|
||||
b's' => Bytes,
|
||||
// b't' => Interned,
|
||||
// b'r' => Ref,
|
||||
b'(' => Tuple,
|
||||
b'[' => List,
|
||||
b'{' => Dict,
|
||||
b'c' => Code,
|
||||
b'u' => Unicode,
|
||||
// b'?' => Unknown,
|
||||
b'<' => Set,
|
||||
b'>' => FrozenSet,
|
||||
b'a' => Ascii,
|
||||
// b'A' => AsciiInterned,
|
||||
// b')' => SmallTuple,
|
||||
// b'z' => ShortAscii,
|
||||
// b'Z' => ShortAsciiInterned,
|
||||
_ => return Err(MarshalError::BadType),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Read {
|
||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]>;
|
||||
fn read_array<const N: usize>(&mut self) -> Result<&[u8; N]> {
|
||||
self.read_slice(N as u32).map(|s| s.try_into().unwrap())
|
||||
}
|
||||
fn read_str(&mut self, len: u32) -> Result<&str> {
|
||||
Ok(std::str::from_utf8(self.read_slice(len)?)?)
|
||||
}
|
||||
fn read_u8(&mut self) -> Result<u8> {
|
||||
Ok(u8::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
fn read_u16(&mut self) -> Result<u16> {
|
||||
Ok(u16::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
fn read_u32(&mut self) -> Result<u32> {
|
||||
Ok(u32::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
fn read_u64(&mut self) -> Result<u64> {
|
||||
Ok(u64::from_le_bytes(*self.read_array()?))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait ReadBorrowed<'a>: Read {
|
||||
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
|
||||
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
|
||||
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl Read for &[u8] {
|
||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
|
||||
self.read_slice_borrow(n)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ReadBorrowed<'a> for &'a [u8] {
|
||||
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
|
||||
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
|
||||
*self = &self[n as usize..];
|
||||
Ok(data)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Cursor<B> {
|
||||
pub data: B,
|
||||
pub position: usize,
|
||||
}
|
||||
|
||||
impl<B: AsRef<[u8]>> Read for Cursor<B> {
|
||||
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
|
||||
let data = &self.data.as_ref()[self.position..];
|
||||
let slice = data.get(..n as usize).ok_or(MarshalError::Eof)?;
|
||||
self.position += n as usize;
|
||||
Ok(slice)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_code<R: Read, Bag: ConstantBag>(
|
||||
rdr: &mut R,
|
||||
bag: Bag,
|
||||
) -> Result<CodeObject<Bag::Constant>> {
|
||||
let len = rdr.read_u32()?;
|
||||
let instructions = rdr.read_slice(len * 2)?;
|
||||
let instructions = instructions
|
||||
.chunks_exact(2)
|
||||
.map(|cu| {
|
||||
let op = Instruction::try_from(cu[0])?;
|
||||
let arg = OpArgByte(cu[1]);
|
||||
Ok(CodeUnit { op, arg })
|
||||
})
|
||||
.collect::<Result<Box<[CodeUnit]>>>()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let locations = (0..len)
|
||||
.map(|_| {
|
||||
Ok(SourceLocation {
|
||||
row: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
|
||||
column: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Box<[SourceLocation]>>>()?;
|
||||
|
||||
let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?);
|
||||
|
||||
let posonlyarg_count = rdr.read_u32()?;
|
||||
let arg_count = rdr.read_u32()?;
|
||||
let kwonlyarg_count = rdr.read_u32()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let source_path = bag.make_name(rdr.read_str(len)?);
|
||||
|
||||
let first_line_number =
|
||||
OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?;
|
||||
let max_stackdepth = rdr.read_u32()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let obj_name = bag.make_name(rdr.read_str(len)?);
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let cell2arg = (len != 0)
|
||||
.then(|| {
|
||||
(0..len)
|
||||
.map(|_| Ok(rdr.read_u32()? as i32))
|
||||
.collect::<Result<Box<[i32]>>>()
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let len = rdr.read_u32()?;
|
||||
let constants = (0..len)
|
||||
.map(|_| deserialize_value(rdr, bag))
|
||||
.collect::<Result<Box<[_]>>>()?;
|
||||
|
||||
let mut read_names = || {
|
||||
let len = rdr.read_u32()?;
|
||||
(0..len)
|
||||
.map(|_| {
|
||||
let len = rdr.read_u32()?;
|
||||
Ok(bag.make_name(rdr.read_str(len)?))
|
||||
})
|
||||
.collect::<Result<Box<[_]>>>()
|
||||
};
|
||||
|
||||
let names = read_names()?;
|
||||
let varnames = read_names()?;
|
||||
let cellvars = read_names()?;
|
||||
let freevars = read_names()?;
|
||||
|
||||
Ok(CodeObject {
|
||||
instructions,
|
||||
locations,
|
||||
flags,
|
||||
posonlyarg_count,
|
||||
arg_count,
|
||||
kwonlyarg_count,
|
||||
source_path,
|
||||
first_line_number,
|
||||
max_stackdepth,
|
||||
obj_name,
|
||||
cell2arg,
|
||||
constants,
|
||||
names,
|
||||
varnames,
|
||||
cellvars,
|
||||
freevars,
|
||||
})
|
||||
}
|
||||
|
||||
pub trait MarshalBag: Copy {
|
||||
type Value;
|
||||
fn make_bool(&self, value: bool) -> Self::Value;
|
||||
fn make_none(&self) -> Self::Value;
|
||||
fn make_ellipsis(&self) -> Self::Value;
|
||||
fn make_float(&self, value: f64) -> Self::Value;
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value;
|
||||
fn make_str(&self, value: &str) -> Self::Value;
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value;
|
||||
fn make_int(&self, value: BigInt) -> Self::Value;
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value;
|
||||
fn make_code(
|
||||
&self,
|
||||
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
|
||||
) -> Self::Value;
|
||||
fn make_stop_iter(&self) -> Result<Self::Value>;
|
||||
fn make_list(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
|
||||
fn make_set(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
|
||||
fn make_frozenset(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
|
||||
fn make_dict(
|
||||
&self,
|
||||
it: impl Iterator<Item = (Self::Value, Self::Value)>,
|
||||
) -> Result<Self::Value>;
|
||||
type ConstantBag: ConstantBag;
|
||||
fn constant_bag(self) -> Self::ConstantBag;
|
||||
}
|
||||
|
||||
impl<Bag: ConstantBag> MarshalBag for Bag {
|
||||
type Value = Bag::Constant;
|
||||
fn make_bool(&self, value: bool) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Boolean { value })
|
||||
}
|
||||
fn make_none(&self) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::None)
|
||||
}
|
||||
fn make_ellipsis(&self) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Ellipsis)
|
||||
}
|
||||
fn make_float(&self, value: f64) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Float { value })
|
||||
}
|
||||
fn make_complex(&self, value: Complex64) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Complex { value })
|
||||
}
|
||||
fn make_str(&self, value: &str) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Str { value })
|
||||
}
|
||||
fn make_bytes(&self, value: &[u8]) -> Self::Value {
|
||||
self.make_constant::<Bag::Constant>(BorrowedConstant::Bytes { value })
|
||||
}
|
||||
fn make_int(&self, value: BigInt) -> Self::Value {
|
||||
self.make_int(value)
|
||||
}
|
||||
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value {
|
||||
self.make_tuple(elements)
|
||||
}
|
||||
fn make_code(
|
||||
&self,
|
||||
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
|
||||
) -> Self::Value {
|
||||
self.make_code(code)
|
||||
}
|
||||
fn make_stop_iter(&self) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_list(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_set(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_frozenset(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
fn make_dict(
|
||||
&self,
|
||||
_: impl Iterator<Item = (Self::Value, Self::Value)>,
|
||||
) -> Result<Self::Value> {
|
||||
Err(MarshalError::BadType)
|
||||
}
|
||||
type ConstantBag = Self;
|
||||
fn constant_bag(self) -> Self::ConstantBag {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_value<R: Read, Bag: MarshalBag>(rdr: &mut R, bag: Bag) -> Result<Bag::Value> {
|
||||
let typ = Type::try_from(rdr.read_u8()?)?;
|
||||
let value = match typ {
|
||||
Type::True => bag.make_bool(true),
|
||||
Type::False => bag.make_bool(false),
|
||||
Type::None => bag.make_none(),
|
||||
Type::StopIter => bag.make_stop_iter()?,
|
||||
Type::Ellipsis => bag.make_ellipsis(),
|
||||
Type::Int => {
|
||||
let len = rdr.read_u32()? as i32;
|
||||
let sign = if len < 0 { Sign::Minus } else { Sign::Plus };
|
||||
let bytes = rdr.read_slice(len.unsigned_abs())?;
|
||||
let int = BigInt::from_bytes_le(sign, bytes);
|
||||
bag.make_int(int)
|
||||
}
|
||||
Type::Float => {
|
||||
let value = f64::from_bits(rdr.read_u64()?);
|
||||
bag.make_float(value)
|
||||
}
|
||||
Type::Complex => {
|
||||
let re = f64::from_bits(rdr.read_u64()?);
|
||||
let im = f64::from_bits(rdr.read_u64()?);
|
||||
let value = Complex64 { re, im };
|
||||
bag.make_complex(value)
|
||||
}
|
||||
Type::Ascii | Type::Unicode => {
|
||||
let len = rdr.read_u32()?;
|
||||
let value = rdr.read_str(len)?;
|
||||
bag.make_str(value)
|
||||
}
|
||||
Type::Tuple => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_tuple(it))?
|
||||
}
|
||||
Type::List => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_list(it))??
|
||||
}
|
||||
Type::Set => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_set(it))??
|
||||
}
|
||||
Type::FrozenSet => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| deserialize_value(rdr, bag));
|
||||
itertools::process_results(it, |it| bag.make_frozenset(it))??
|
||||
}
|
||||
Type::Dict => {
|
||||
let len = rdr.read_u32()?;
|
||||
let it = (0..len).map(|_| {
|
||||
let k = deserialize_value(rdr, bag)?;
|
||||
let v = deserialize_value(rdr, bag)?;
|
||||
Ok::<_, MarshalError>((k, v))
|
||||
});
|
||||
itertools::process_results(it, |it| bag.make_dict(it))??
|
||||
}
|
||||
Type::Bytes => {
|
||||
// Following CPython, after marshaling, byte arrays are converted into bytes.
|
||||
let len = rdr.read_u32()?;
|
||||
let value = rdr.read_slice(len)?;
|
||||
bag.make_bytes(value)
|
||||
}
|
||||
Type::Code => bag.make_code(deserialize_code(rdr, bag.constant_bag())?),
|
||||
};
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
pub trait Dumpable: Sized {
|
||||
type Error;
|
||||
type Constant: Constant;
|
||||
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error>;
|
||||
}
|
||||
|
||||
pub enum DumpableValue<'a, D: Dumpable> {
|
||||
Integer(&'a BigInt),
|
||||
Float(f64),
|
||||
Complex(Complex64),
|
||||
Boolean(bool),
|
||||
Str(&'a str),
|
||||
Bytes(&'a [u8]),
|
||||
Code(&'a CodeObject<D::Constant>),
|
||||
Tuple(&'a [D]),
|
||||
None,
|
||||
Ellipsis,
|
||||
StopIter,
|
||||
List(&'a [D]),
|
||||
Set(&'a [D]),
|
||||
Frozenset(&'a [D]),
|
||||
Dict(&'a [(D, D)]),
|
||||
}
|
||||
|
||||
impl<'a, C: Constant> From<BorrowedConstant<'a, C>> for DumpableValue<'a, C> {
|
||||
fn from(c: BorrowedConstant<'a, C>) -> Self {
|
||||
match c {
|
||||
BorrowedConstant::Integer { value } => Self::Integer(value),
|
||||
BorrowedConstant::Float { value } => Self::Float(value),
|
||||
BorrowedConstant::Complex { value } => Self::Complex(value),
|
||||
BorrowedConstant::Boolean { value } => Self::Boolean(value),
|
||||
BorrowedConstant::Str { value } => Self::Str(value),
|
||||
BorrowedConstant::Bytes { value } => Self::Bytes(value),
|
||||
BorrowedConstant::Code { code } => Self::Code(code),
|
||||
BorrowedConstant::Tuple { elements } => Self::Tuple(elements),
|
||||
BorrowedConstant::None => Self::None,
|
||||
BorrowedConstant::Ellipsis => Self::Ellipsis,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Constant> Dumpable for C {
|
||||
type Error = Infallible;
|
||||
type Constant = Self;
|
||||
#[inline(always)]
|
||||
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error> {
|
||||
Ok(f(self.borrow_constant().into()))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Write {
|
||||
fn write_slice(&mut self, slice: &[u8]);
|
||||
fn write_u8(&mut self, v: u8) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
fn write_u16(&mut self, v: u16) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
fn write_u32(&mut self, v: u32) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
fn write_u64(&mut self, v: u64) {
|
||||
self.write_slice(&v.to_le_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for Vec<u8> {
|
||||
fn write_slice(&mut self, slice: &[u8]) {
|
||||
self.extend_from_slice(slice)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
|
||||
let Ok(len) = len.try_into() else { panic!("too long to serialize") };
|
||||
buf.write_u32(len);
|
||||
}
|
||||
|
||||
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
|
||||
write_len(buf, slice.len());
|
||||
buf.write_slice(slice);
|
||||
}
|
||||
|
||||
pub fn serialize_value<W: Write, D: Dumpable>(
|
||||
buf: &mut W,
|
||||
constant: DumpableValue<'_, D>,
|
||||
) -> Result<(), D::Error> {
|
||||
match constant {
|
||||
DumpableValue::Integer(int) => {
|
||||
buf.write_u8(Type::Int as u8);
|
||||
let (sign, bytes) = int.to_bytes_le();
|
||||
let len: i32 = bytes.len().try_into().expect("too long to serialize");
|
||||
let len = if sign == Sign::Minus { -len } else { len };
|
||||
buf.write_u32(len as u32);
|
||||
buf.write_slice(&bytes);
|
||||
}
|
||||
DumpableValue::Float(f) => {
|
||||
buf.write_u8(Type::Float as u8);
|
||||
buf.write_u64(f.to_bits());
|
||||
}
|
||||
DumpableValue::Complex(c) => {
|
||||
buf.write_u8(Type::Complex as u8);
|
||||
buf.write_u64(c.re.to_bits());
|
||||
buf.write_u64(c.im.to_bits());
|
||||
}
|
||||
DumpableValue::Boolean(b) => {
|
||||
buf.write_u8(if b { Type::True } else { Type::False } as u8);
|
||||
}
|
||||
DumpableValue::Str(s) => {
|
||||
buf.write_u8(Type::Unicode as u8);
|
||||
write_vec(buf, s.as_bytes());
|
||||
}
|
||||
DumpableValue::Bytes(b) => {
|
||||
buf.write_u8(Type::Bytes as u8);
|
||||
write_vec(buf, b);
|
||||
}
|
||||
DumpableValue::Code(c) => {
|
||||
buf.write_u8(Type::Code as u8);
|
||||
serialize_code(buf, c);
|
||||
}
|
||||
DumpableValue::Tuple(tup) => {
|
||||
buf.write_u8(Type::Tuple as u8);
|
||||
write_len(buf, tup.len());
|
||||
for val in tup {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::None => {
|
||||
buf.write_u8(Type::None as u8);
|
||||
}
|
||||
DumpableValue::Ellipsis => {
|
||||
buf.write_u8(Type::Ellipsis as u8);
|
||||
}
|
||||
DumpableValue::StopIter => {
|
||||
buf.write_u8(Type::StopIter as u8);
|
||||
}
|
||||
DumpableValue::List(l) => {
|
||||
buf.write_u8(Type::List as u8);
|
||||
write_len(buf, l.len());
|
||||
for val in l {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::Set(set) => {
|
||||
buf.write_u8(Type::Set as u8);
|
||||
write_len(buf, set.len());
|
||||
for val in set {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::Frozenset(set) => {
|
||||
buf.write_u8(Type::FrozenSet as u8);
|
||||
write_len(buf, set.len());
|
||||
for val in set {
|
||||
val.with_dump(|val| serialize_value(buf, val))??
|
||||
}
|
||||
}
|
||||
DumpableValue::Dict(d) => {
|
||||
buf.write_u8(Type::Dict as u8);
|
||||
write_len(buf, d.len());
|
||||
for (k, v) in d {
|
||||
k.with_dump(|val| serialize_value(buf, val))??;
|
||||
v.with_dump(|val| serialize_value(buf, val))??;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
|
||||
write_len(buf, code.instructions.len());
|
||||
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
|
||||
let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
|
||||
buf.write_slice(instructions_bytes);
|
||||
|
||||
write_len(buf, code.locations.len());
|
||||
for loc in &*code.locations {
|
||||
buf.write_u32(loc.row.get() as _);
|
||||
buf.write_u32(loc.column.get() as _);
|
||||
}
|
||||
|
||||
buf.write_u16(code.flags.bits());
|
||||
|
||||
buf.write_u32(code.posonlyarg_count);
|
||||
buf.write_u32(code.arg_count);
|
||||
buf.write_u32(code.kwonlyarg_count);
|
||||
|
||||
write_vec(buf, code.source_path.as_ref().as_bytes());
|
||||
|
||||
buf.write_u32(code.first_line_number.get());
|
||||
buf.write_u32(code.max_stackdepth);
|
||||
|
||||
write_vec(buf, code.obj_name.as_ref().as_bytes());
|
||||
|
||||
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
|
||||
write_len(buf, cell2arg.len());
|
||||
for &i in cell2arg {
|
||||
buf.write_u32(i as u32)
|
||||
}
|
||||
|
||||
write_len(buf, code.constants.len());
|
||||
for constant in &*code.constants {
|
||||
serialize_value(buf, constant.borrow_constant().into()).unwrap_or_else(|x| match x {})
|
||||
}
|
||||
|
||||
let mut write_names = |names: &[C::Name]| {
|
||||
write_len(buf, names.len());
|
||||
for name in names {
|
||||
write_vec(buf, name.as_ref().as_bytes());
|
||||
}
|
||||
};
|
||||
|
||||
write_names(&code.names);
|
||||
write_names(&code.varnames);
|
||||
write_names(&code.cellvars);
|
||||
write_names(&code.freevars);
|
||||
}
|
|
@ -1,27 +1,30 @@
|
|||
//! Control in the different modes by which a source file can be parsed.
|
||||
|
||||
/// The mode argument specifies in what way code must be parsed.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Mode {
|
||||
Exec,
|
||||
Eval,
|
||||
Single,
|
||||
BlockExpr,
|
||||
/// The code consists of a sequence of statements.
|
||||
Module,
|
||||
/// The code consists of a sequence of interactive statement.
|
||||
Interactive,
|
||||
/// The code consists of a single expression.
|
||||
Expression,
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Mode {
|
||||
type Err = ModeParseError;
|
||||
|
||||
// To support `builtins.compile()` `mode` argument
|
||||
fn from_str(s: &str) -> Result<Self, ModeParseError> {
|
||||
match s {
|
||||
"exec" => Ok(Mode::Exec),
|
||||
"eval" => Ok(Mode::Eval),
|
||||
"single" => Ok(Mode::Single),
|
||||
_ => Err(ModeParseError(())),
|
||||
"exec" | "single" => Ok(Mode::Module),
|
||||
"eval" => Ok(Mode::Expression),
|
||||
_ => Err(ModeParseError),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returned when a given mode is not valid.
|
||||
#[derive(Debug)]
|
||||
pub struct ModeParseError(());
|
||||
pub struct ModeParseError;
|
||||
|
||||
impl std::fmt::Display for ModeParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
|
|
126
core/src/source_code.rs
Normal file
126
core/src/source_code.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
// re-export our public interface
|
||||
pub use ruff_python_ast::source_code::*;
|
||||
|
||||
pub type LineNumber = ruff_python_ast::source_code::OneIndexed;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SourceRange {
|
||||
pub start: SourceLocation,
|
||||
pub end: Option<SourceLocation>,
|
||||
}
|
||||
|
||||
impl SourceRange {
|
||||
pub fn new(start: SourceLocation, end: SourceLocation) -> Self {
|
||||
Self {
|
||||
start,
|
||||
end: Some(end),
|
||||
}
|
||||
}
|
||||
pub fn unwrap_end(&self) -> SourceLocation {
|
||||
self.end.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::ops::Range<SourceLocation>> for SourceRange {
|
||||
fn from(value: std::ops::Range<SourceLocation>) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
end: Some(value.end),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts source code byte-offset to Python convention line and column numbers.
|
||||
pub struct SourceLocator<'a> {
|
||||
pub source: &'a str,
|
||||
index: LineIndex,
|
||||
}
|
||||
|
||||
impl<'a> SourceLocator<'a> {
|
||||
#[inline]
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
let index = LineIndex::from_source_text(source);
|
||||
Self { source, index }
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode::new(self.source, &self.index)
|
||||
}
|
||||
|
||||
pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
|
||||
let offset = offset.to_u32().into();
|
||||
self.to_source_code().source_location(offset)
|
||||
}
|
||||
|
||||
pub fn locate_error<T, U>(&mut self, base: crate::error::BaseError<T>) -> LocatedError<U>
|
||||
where
|
||||
T: Into<U>,
|
||||
{
|
||||
let location = self.locate(base.offset);
|
||||
LocatedError {
|
||||
error: base.error.into(),
|
||||
location: Some(location),
|
||||
source_path: base.source_path,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct LocatedError<T> {
|
||||
pub error: T,
|
||||
pub location: Option<SourceLocation>,
|
||||
pub source_path: String,
|
||||
}
|
||||
|
||||
impl<T> LocatedError<T> {
|
||||
pub fn error(self) -> T {
|
||||
self.error
|
||||
}
|
||||
|
||||
pub fn from<U>(obj: LocatedError<U>) -> Self
|
||||
where
|
||||
U: Into<T>,
|
||||
{
|
||||
Self {
|
||||
error: obj.error.into(),
|
||||
location: obj.location,
|
||||
source_path: obj.source_path,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into<U>(self) -> LocatedError<U>
|
||||
where
|
||||
T: Into<U>,
|
||||
{
|
||||
LocatedError::from(self)
|
||||
}
|
||||
|
||||
pub fn python_location(&self) -> (usize, usize) {
|
||||
if let Some(location) = self.location {
|
||||
(location.row.to_usize(), location.column.to_usize())
|
||||
} else {
|
||||
(0, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::fmt::Display for LocatedError<T>
|
||||
where
|
||||
T: std::fmt::Display,
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let (row, column) = self
|
||||
.location
|
||||
.map_or((0, 0), |l| (l.row.to_usize(), l.column.to_usize()));
|
||||
write!(f, "{} at row {} col {}", &self.error, row, column,)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::error::Error for LocatedError<T>
|
||||
where
|
||||
T: std::error::Error + 'static,
|
||||
{
|
||||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||
Some(&self.error)
|
||||
}
|
||||
}
|
|
@ -10,7 +10,7 @@ edition = "2021"
|
|||
|
||||
[features]
|
||||
default = []
|
||||
serde = ["dep:serde", "rustpython-compiler-core/serde"]
|
||||
serde = ["dep:serde", "rustpython-parser-core/serde"]
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { workspace = true }
|
||||
|
@ -19,10 +19,9 @@ phf_codegen = "0.11.1"
|
|||
tiny-keccak = { version = "2", features = ["sha3"] }
|
||||
|
||||
[dependencies]
|
||||
rustpython-ast = { path = "../ast", version = "0.2.0" }
|
||||
rustpython-compiler-core = { path = "../core", version = "0.2.0" }
|
||||
rustpython-ast = { workspace = true }
|
||||
rustpython-parser-core = { workspace = true }
|
||||
|
||||
ahash = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
log = { workspace = true }
|
||||
num-bigint = { workspace = true }
|
||||
|
|
|
@ -28,11 +28,11 @@
|
|||
//!
|
||||
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
||||
use crate::{
|
||||
mode::Mode,
|
||||
soft_keywords::SoftKeywordTransformer,
|
||||
string::FStringErrorType,
|
||||
text_size::{TextLen, TextRange, TextSize},
|
||||
token::{StringKind, Tok},
|
||||
Mode,
|
||||
};
|
||||
use log::trace;
|
||||
use num_bigint::BigInt;
|
||||
|
|
|
@ -113,20 +113,17 @@
|
|||
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
|
||||
|
||||
pub use rustpython_ast as ast;
|
||||
pub use rustpython_compiler_core::text_size;
|
||||
pub use rustpython_compiler_core::ConversionFlag;
|
||||
pub use rustpython_parser_core::{source_code, text_size, Mode};
|
||||
|
||||
mod function;
|
||||
// Skip flattening lexer to distinguish from full parser
|
||||
mod context;
|
||||
pub mod lexer;
|
||||
mod mode;
|
||||
mod parser;
|
||||
mod soft_keywords;
|
||||
mod string;
|
||||
mod token;
|
||||
|
||||
pub use mode::Mode;
|
||||
pub use parser::{
|
||||
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
|
||||
ParseError, ParseErrorType,
|
||||
|
|
|
@ -1,55 +0,0 @@
|
|||
//! Control in the different modes by which a source file can be parsed.
|
||||
use crate::token::Tok;
|
||||
|
||||
/// The mode argument specifies in what way code must be parsed.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Mode {
|
||||
/// The code consists of a sequence of statements.
|
||||
Module,
|
||||
/// The code consists of a sequence of interactive statement.
|
||||
Interactive,
|
||||
/// The code consists of a single expression.
|
||||
Expression,
|
||||
}
|
||||
|
||||
impl Mode {
|
||||
pub(crate) fn to_marker(self) -> Tok {
|
||||
match self {
|
||||
Self::Module => Tok::StartModule,
|
||||
Self::Interactive => Tok::StartInteractive,
|
||||
Self::Expression => Tok::StartExpression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rustpython_compiler_core::Mode> for Mode {
|
||||
fn from(mode: rustpython_compiler_core::Mode) -> Self {
|
||||
use rustpython_compiler_core::Mode as CompileMode;
|
||||
match mode {
|
||||
CompileMode::Exec => Self::Module,
|
||||
CompileMode::Eval => Self::Expression,
|
||||
CompileMode::Single | CompileMode::BlockExpr => Self::Interactive,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Mode {
|
||||
type Err = ModeParseError;
|
||||
fn from_str(s: &str) -> Result<Self, ModeParseError> {
|
||||
match s {
|
||||
"exec" | "single" => Ok(Mode::Module),
|
||||
"eval" => Ok(Mode::Expression),
|
||||
_ => Err(ModeParseError(())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returned when a given mode is not valid.
|
||||
#[derive(Debug)]
|
||||
pub struct ModeParseError(());
|
||||
|
||||
impl std::fmt::Display for ModeParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, r#"mode must be "exec", "eval", or "single""#)
|
||||
}
|
||||
}
|
|
@ -15,10 +15,10 @@
|
|||
use crate::{
|
||||
ast,
|
||||
lexer::{self, LexResult, LexicalError, LexicalErrorType},
|
||||
mode::Mode,
|
||||
python,
|
||||
text_size::TextSize,
|
||||
token::Tok,
|
||||
Mode,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use std::iter;
|
||||
|
@ -187,7 +187,7 @@ pub fn parse_tokens(
|
|||
mode: Mode,
|
||||
source_path: &str,
|
||||
) -> Result<ast::Mod, ParseError> {
|
||||
let marker_token = (mode.to_marker(), Default::default());
|
||||
let marker_token = (Tok::start_marker(mode), Default::default());
|
||||
let lexer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
|
@ -202,7 +202,7 @@ pub fn parse_tokens(
|
|||
|
||||
/// Represents represent errors that occur during parsing and are
|
||||
/// returned by the `parse_*` functions.
|
||||
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
||||
pub type ParseError = rustpython_parser_core::BaseError<ParseErrorType>;
|
||||
|
||||
/// Represents the different types of errors that can occur during parsing.
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
|
|
@ -10,6 +10,7 @@ use crate::{
|
|||
context::set_context,
|
||||
string::parse_strings,
|
||||
token::{self, StringKind},
|
||||
text_size::TextSize,
|
||||
};
|
||||
use num_bigint::BigInt;
|
||||
|
||||
|
@ -254,7 +255,7 @@ ImportStatement: ast::Stmt = {
|
|||
},
|
||||
};
|
||||
|
||||
ImportFromLocation: (Option<usize>, Option<String>) = {
|
||||
ImportFromLocation: (Option<u32>, Option<String>) = {
|
||||
<dots: ImportDots*> <name:DottedName> => {
|
||||
(Some(dots.iter().sum()), Some(name))
|
||||
},
|
||||
|
@ -263,7 +264,7 @@ ImportFromLocation: (Option<usize>, Option<String>) = {
|
|||
},
|
||||
};
|
||||
|
||||
ImportDots: usize = {
|
||||
ImportDots: u32 = {
|
||||
"..." => 3,
|
||||
"." => 1,
|
||||
};
|
||||
|
@ -1721,7 +1722,7 @@ ArgumentList: ArgumentList = {
|
|||
}
|
||||
};
|
||||
|
||||
FunctionArgument: (Option<(crate::text_size::TextSize, crate::text_size::TextSize, Option<String>)>, ast::Expr) = {
|
||||
FunctionArgument: (Option<(TextSize, TextSize, Option<String>)>, ast::Expr) = {
|
||||
<location:@L> <e:NamedExpressionTest> <c:CompFor?> <end_location:@R> => {
|
||||
let expr = match c {
|
||||
Some(c) => ast::Expr::new(
|
||||
|
@ -1775,7 +1776,7 @@ Identifier: String = <s:name> => s;
|
|||
|
||||
// Hook external lexer:
|
||||
extern {
|
||||
type Location = crate::text_size::TextSize;
|
||||
type Location = TextSize;
|
||||
type Error = LexicalError;
|
||||
|
||||
enum token::Tok {
|
||||
|
|
18926
parser/src/python.rs
generated
18926
parser/src/python.rs
generated
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
|||
use crate::{lexer::LexResult, mode::Mode, token::Tok};
|
||||
use crate::{lexer::LexResult, token::Tok, Mode};
|
||||
use itertools::{Itertools, MultiPeek};
|
||||
|
||||
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||
|
|
|
@ -4,13 +4,16 @@
|
|||
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
||||
// we have to do the parsing here, manually.
|
||||
use crate::{
|
||||
ast::{self, Constant, ConversionFlag, Expr, ExprKind},
|
||||
ast::{self, Constant, Expr, ExprKind},
|
||||
lexer::{LexicalError, LexicalErrorType},
|
||||
parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
|
||||
token::{StringKind, Tok},
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use rustpython_compiler_core::text_size::{TextLen, TextSize};
|
||||
use rustpython_parser_core::{
|
||||
text_size::{TextLen, TextSize},
|
||||
ConversionFlag,
|
||||
};
|
||||
|
||||
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||
const MAX_UNICODE_NAME: usize = 88;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
//! loosely based on the token definitions found in the [CPython source].
|
||||
//!
|
||||
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h
|
||||
use crate::text_size::TextSize;
|
||||
use crate::{text_size::TextSize, Mode};
|
||||
use num_bigint::BigInt;
|
||||
use std::fmt;
|
||||
|
||||
|
@ -196,6 +196,16 @@ pub enum Tok {
|
|||
StartExpression,
|
||||
}
|
||||
|
||||
impl Tok {
|
||||
pub fn start_marker(mode: Mode) -> Self {
|
||||
match mode {
|
||||
Mode::Module => Tok::StartModule,
|
||||
Mode::Interactive => Tok::StartInteractive,
|
||||
Mode::Expression => Tok::StartExpression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Tok {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Tok::*;
|
||||
|
@ -404,10 +414,11 @@ impl StringKind {
|
|||
/// Returns the number of characters in the prefix.
|
||||
pub fn prefix_len(&self) -> TextSize {
|
||||
use StringKind::*;
|
||||
match self {
|
||||
String => TextSize::from(0),
|
||||
RawString | FString | Unicode | Bytes => TextSize::from(1),
|
||||
RawFString | RawBytes => TextSize::from(2),
|
||||
}
|
||||
let len = match self {
|
||||
String => 0,
|
||||
RawString | FString | Unicode | Bytes => 1,
|
||||
RawFString | RawBytes => 2,
|
||||
};
|
||||
len.into()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue