numerous refactoring

- Split parser core and compiler core. Fix #14
- AST int type to `u32`
- Updated asdl_rs.py and update_asdl.sh fix #6
- Use `ruff_python_ast::SourceLocation` for Python source location. Deleted our own Location.
- Renamed ast::Located to ast::Attributed to distinguish terms for TextSize and SourceLocation
- `ast::<Node>`s for TextSize located ast. `ast::located::<Node>` for Python source located ast.
- And also strictly renaming `located` to refer only python location related interfaces.
- `SourceLocator` to convert locations.
- New `source-code` features of to disable python locations when unnecessary.
- Also including fully merging https://github.com/astral-sh/RustPython/pull/4 closes #9
This commit is contained in:
Jeong YunWon 2023-05-10 02:36:52 +09:00
parent 09a6afdd04
commit a3d9d8cb14
29 changed files with 9737 additions and 12000 deletions

View file

@ -16,11 +16,12 @@ members = [
] ]
[workspace.dependencies] [workspace.dependencies]
rustpython-ast = { path = "ast", version = "0.2.0" }
rustpython-parser-core = { path = "core", version = "0.2.0" }
rustpython-literal = { path = "literal", version = "0.2.0" }
ahash = "0.7.6" ahash = "0.7.6"
anyhow = "1.0.45" anyhow = "1.0.45"
ascii = "1.0"
bitflags = "1.3.2"
bstr = "0.2.17"
cfg-if = "1.0" cfg-if = "1.0"
insta = "1.14.0" insta = "1.14.0"
itertools = "0.10.3" itertools = "0.10.3"
@ -32,7 +33,7 @@ rand = "0.8.5"
serde = "1.0" serde = "1.0"
static_assertions = "1.1" static_assertions = "1.1"
unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" }
ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "583df5c1fa43b2732896219f8ab425116c140c80" } ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "088958e8fda2f74f1ebf315c75db13c232409b13" }
# ruff_python_ast = { path = "../ruff/crates/ruff_python_ast" } # ruff_python_ast = { path = "../ruff/crates/ruff_python_ast" }
[profile.dev.package."*"] [profile.dev.package."*"]

View file

@ -8,14 +8,14 @@ repository = "https://github.com/RustPython/RustPython"
license = "MIT" license = "MIT"
[features] [features]
default = ["constant-optimization", "fold", "location"] default = ["constant-optimization", "fold", "source-code"]
constant-optimization = ["fold"] constant-optimization = ["fold"]
location = [] source-code = ["fold"]
fold = [] fold = []
unparse = ["rustpython-literal"] unparse = ["rustpython-literal"]
[dependencies] [dependencies]
rustpython-compiler-core = { path = "../core", version = "0.2.0" } rustpython-parser-core = { workspace = true }
rustpython-literal = { path = "../literal", version = "0.2.0", optional = true } rustpython-literal = { workspace = true, optional = true }
num-bigint = { workspace = true } num-bigint = { workspace = true }

View file

@ -8,7 +8,6 @@ import textwrap
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from typing import Optional, Dict from typing import Optional, Dict
from attr import dataclass
import asdl import asdl
@ -18,7 +17,7 @@ AUTOGEN_MESSAGE = "// File automatically generated by {}.\n"
builtin_type_mapping = { builtin_type_mapping = {
"identifier": "Ident", "identifier": "Ident",
"string": "String", "string": "String",
"int": "usize", "int": "u32",
"constant": "Constant", "constant": "Constant",
} }
assert builtin_type_mapping.keys() == asdl.builtin_types assert builtin_type_mapping.keys() == asdl.builtin_types
@ -391,7 +390,18 @@ class FoldTraitDefVisitor(EmitVisitor):
depth + 1, depth + 1,
) )
self.emit( self.emit(
"fn map_located<T>(&mut self, located: Attributed<T, U>) -> Result<Attributed<T, Self::TargetU>, Self::Error> { let custom = self.map_user(located.custom)?; Ok(Attributed { range: located.range, custom, node: located.node }) }", """
fn map_located<T>(&mut self, located: Attributed<T, U>) -> Result<Attributed<T, Self::TargetU>, Self::Error> {
let custom = self.map_user(located.custom)?;
Ok(Attributed { range: located.range, custom, node: located.node })
}""",
depth + 1,
)
self.emit(
"""
fn fold<X: Foldable<U, Self::TargetU>>(&mut self, node: X) -> Result<X::Mapped, Self::Error> {
node.fold(self)
}""",
depth + 1, depth + 1,
) )
for dfn in mod.dfns: for dfn in mod.dfns:
@ -715,8 +725,8 @@ class TraitImplVisitor(EmitVisitor):
return ",".join(rust_field(f.name) for f in fields) return ",".join(rust_field(f.name) for f in fields)
def gen_sum_fromobj(self, sum, sumname, enumname, rustname, depth): def gen_sum_fromobj(self, sum, sumname, enumname, rustname, depth):
if sum.attributes: # if sum.attributes:
self.extract_location(sumname, depth) # self.extract_location(sumname, depth)
self.emit("let _cls = _object.class();", depth) self.emit("let _cls = _object.class();", depth)
self.emit("Ok(", depth) self.emit("Ok(", depth)
@ -739,8 +749,8 @@ class TraitImplVisitor(EmitVisitor):
self.emit("})", depth) self.emit("})", depth)
def gen_product_fromobj(self, product, prodname, structname, depth): def gen_product_fromobj(self, product, prodname, structname, depth):
if product.attributes: # if product.attributes:
self.extract_location(prodname, depth) # self.extract_location(prodname, depth)
self.emit("Ok(", depth) self.emit("Ok(", depth)
self.gen_construction(structname, product, prodname, depth + 1) self.gen_construction(structname, product, prodname, depth + 1)
@ -761,11 +771,15 @@ class TraitImplVisitor(EmitVisitor):
def extract_location(self, typename, depth): def extract_location(self, typename, depth):
row = self.decode_field(asdl.Field("int", "lineno"), typename) row = self.decode_field(asdl.Field("int", "lineno"), typename)
column = self.decode_field(asdl.Field("int", "col_offset"), typename) column = self.decode_field(asdl.Field("int", "col_offset"), typename)
self.emit(f"""let _location = {{ self.emit(
let row = try_location_field({row}, _vm)?; f"""
let column = try_location_field({column}, _vm)?; let _location = {{
SourceLocation {{ row, column }} let row = {row};
}};""", depth) let column = {column};
try_location(row, column)
}};""",
depth,
)
def decode_field(self, field, typename): def decode_field(self, field, typename):
name = json.dumps(field.name) name = json.dumps(field.name)
@ -805,7 +819,7 @@ def write_located_def(typeinfo, f):
f.write( f.write(
textwrap.dedent( textwrap.dedent(
""" """
use crate::location::SourceRange; use rustpython_parser_core::source_code::SourceRange;
pub type Located<T> = super::generic::Attributed<T, SourceRange>; pub type Located<T> = super::generic::Attributed<T, SourceRange>;
""" """

View file

@ -1,5 +1,7 @@
use crate::location::{SourceLocation, SourceRange}; use rustpython_parser_core::{
use rustpython_compiler_core::text_size::{TextRange, TextSize}; source_code::{SourceLocation, SourceRange},
text_size::{TextRange, TextSize},
};
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct Attributed<T, U = ()> { pub struct Attributed<T, U = ()> {

View file

@ -1,5 +1,4 @@
use num_bigint::BigInt; use num_bigint::BigInt;
pub use rustpython_compiler_core::ConversionFlag;
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Constant { pub enum Constant {
@ -137,7 +136,7 @@ impl<U> crate::fold::Fold<U> for ConstantOptimizer {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use rustpython_compiler_core::text_size::TextRange; use rustpython_parser_core::text_size::TextRange;
#[cfg(feature = "constant-optimization")] #[cfg(feature = "constant-optimization")]
#[test] #[test]

View file

@ -62,4 +62,4 @@ macro_rules! simple_fold {
}; };
} }
simple_fold!(usize, String, bool, constant::Constant); simple_fold!(u32, String, bool, constant::Constant);

View file

@ -158,7 +158,7 @@ pub struct StmtAnnAssign<U = ()> {
pub target: Box<Expr<U>>, pub target: Box<Expr<U>>,
pub annotation: Box<Expr<U>>, pub annotation: Box<Expr<U>>,
pub value: Option<Box<Expr<U>>>, pub value: Option<Box<Expr<U>>>,
pub simple: usize, pub simple: u32,
} }
impl<U> From<StmtAnnAssign<U>> for StmtKind<U> { impl<U> From<StmtAnnAssign<U>> for StmtKind<U> {
@ -328,7 +328,7 @@ impl<U> From<StmtImport<U>> for StmtKind<U> {
pub struct StmtImportFrom<U = ()> { pub struct StmtImportFrom<U = ()> {
pub module: Option<Ident>, pub module: Option<Ident>,
pub names: Vec<Alias<U>>, pub names: Vec<Alias<U>>,
pub level: Option<usize>, pub level: Option<u32>,
} }
impl<U> From<StmtImportFrom<U>> for StmtKind<U> { impl<U> From<StmtImportFrom<U>> for StmtKind<U> {
@ -610,7 +610,7 @@ impl<U> From<ExprCall<U>> for ExprKind<U> {
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct ExprFormattedValue<U = ()> { pub struct ExprFormattedValue<U = ()> {
pub value: Box<Expr<U>>, pub value: Box<Expr<U>>,
pub conversion: usize, pub conversion: u32,
pub format_spec: Option<Box<Expr<U>>>, pub format_spec: Option<Box<Expr<U>>>,
} }
@ -819,7 +819,7 @@ pub struct Comprehension<U = ()> {
pub target: Expr<U>, pub target: Expr<U>,
pub iter: Expr<U>, pub iter: Expr<U>,
pub ifs: Vec<Expr<U>>, pub ifs: Vec<Expr<U>>,
pub is_async: usize, pub is_async: u32,
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
@ -996,7 +996,7 @@ pub type Pattern<U = ()> = Attributed<PatternKind<U>, U>;
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct TypeIgnoreTypeIgnore { pub struct TypeIgnoreTypeIgnore {
pub lineno: usize, pub lineno: u32,
pub tag: String, pub tag: String,
} }
@ -1019,6 +1019,7 @@ pub mod fold {
type TargetU; type TargetU;
type Error; type Error;
fn map_user(&mut self, user: U) -> Result<Self::TargetU, Self::Error>; fn map_user(&mut self, user: U) -> Result<Self::TargetU, Self::Error>;
fn map_located<T>( fn map_located<T>(
&mut self, &mut self,
located: Attributed<T, U>, located: Attributed<T, U>,
@ -1030,6 +1031,13 @@ pub mod fold {
node: located.node, node: located.node,
}) })
} }
fn fold<X: Foldable<U, Self::TargetU>>(
&mut self,
node: X,
) -> Result<X::Mapped, Self::Error> {
node.fold(self)
}
fn fold_mod(&mut self, node: Mod<U>) -> Result<Mod<Self::TargetU>, Self::Error> { fn fold_mod(&mut self, node: Mod<U>) -> Result<Mod<Self::TargetU>, Self::Error> {
fold_mod(self, node) fold_mod(self, node)
} }

View file

@ -1,6 +1,6 @@
// File automatically generated by ast/asdl_rs.py. // File automatically generated by ast/asdl_rs.py.
use crate::location::SourceRange; use rustpython_parser_core::source_code::SourceRange;
pub type Located<T> = super::generic::Attributed<T, SourceRange>; pub type Located<T> = super::generic::Attributed<T, SourceRange>;
pub type Mod = super::generic::Mod<SourceRange>; pub type Mod = super::generic::Mod<SourceRange>;

View file

@ -7,53 +7,21 @@ mod generic {
include!("gen/generic.rs"); include!("gen/generic.rs");
} }
mod impls; mod impls;
#[cfg(feature = "location")] #[cfg(feature = "source-code")]
pub mod located { mod source_locator;
include!("gen/located.rs");
}
#[cfg(feature = "location")]
mod locator;
#[cfg(feature = "location")]
pub use crate::locator::locate;
#[cfg(feature = "location")]
pub use rustpython_compiler_core::SourceLocator;
#[cfg(feature = "unparse")] #[cfg(feature = "unparse")]
mod unparse; mod unparse;
pub use attributed::Attributed; pub use attributed::Attributed;
pub use constant::{Constant, ConversionFlag}; pub use constant::Constant;
pub use generic::*; pub use generic::*;
pub use rustpython_parser_core::{text_size, ConversionFlag};
pub type Suite<U = ()> = Vec<Stmt<U>>; pub type Suite<U = ()> = Vec<Stmt<U>>;
pub mod location { #[cfg(feature = "source-code")]
pub use rustpython_compiler_core::source_code::{OneIndexed, SourceLocation}; pub mod located {
include!("gen/located.rs");
#[derive(Debug)]
pub struct SourceRange {
pub start: SourceLocation,
pub end: Option<SourceLocation>,
}
impl SourceRange {
pub fn new(start: SourceLocation, end: SourceLocation) -> Self {
Self {
start,
end: Some(end),
}
}
pub fn unwrap_end(&self) -> SourceLocation {
self.end.unwrap()
}
}
impl From<std::ops::Range<SourceLocation>> for SourceRange {
fn from(value: std::ops::Range<SourceLocation>) -> Self {
Self {
start: value.start,
end: Some(value.end),
}
}
}
} }
pub use rustpython_parser_core::source_code;

View file

@ -1,11 +1,5 @@
use crate::attributed::Attributed; use crate::attributed::Attributed;
use crate::fold_helpers::Foldable; use rustpython_parser_core::source_code::{SourceLocator, SourceRange};
use crate::location::SourceRange;
use rustpython_compiler_core::SourceLocator;
pub fn locate<X: Foldable<(), SourceRange>>(locator: &mut SourceLocator, ast: X) -> X::Mapped {
ast.fold(locator).unwrap()
}
impl crate::fold::Fold<()> for SourceLocator<'_> { impl crate::fold::Fold<()> for SourceLocator<'_> {
type TargetU = SourceRange; type TargetU = SourceRange;

View file

@ -1,7 +1,5 @@
use crate::{ use crate::ConversionFlag;
Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Expr, ExprKind, use crate::{Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Expr, ExprKind, Operator};
Operator,
};
use std::fmt; use std::fmt;
mod precedence { mod precedence {
@ -452,7 +450,7 @@ impl<'a> Unparser<'a> {
fn unparse_formatted<U>( fn unparse_formatted<U>(
&mut self, &mut self,
val: &Expr<U>, val: &Expr<U>,
conversion: usize, conversion: u32,
spec: Option<&Expr<U>>, spec: Option<&Expr<U>>,
) -> fmt::Result { ) -> fmt::Result {
let buffered = to_string_fmt(|f| Unparser::new(f).unparse_expr(val, precedence::TEST + 1)); let buffered = to_string_fmt(|f| Unparser::new(f).unparse_expr(val, precedence::TEST + 1));
@ -466,7 +464,7 @@ impl<'a> Unparser<'a> {
self.p(&buffered)?; self.p(&buffered)?;
drop(buffered); drop(buffered);
if conversion != ConversionFlag::None as usize { if conversion != ConversionFlag::None as u32 {
self.p("!")?; self.p("!")?;
let buf = &[conversion as u8]; let buf = &[conversion as u8];
let c = std::str::from_utf8(buf).unwrap(); let c = std::str::from_utf8(buf).unwrap();

View file

@ -1,6 +1,6 @@
[package] [package]
name = "rustpython-compiler-core" name = "rustpython-parser-core"
description = "RustPython specific bytecode." description = "RustPython parser data types."
version = "0.2.0" version = "0.2.0"
authors = ["RustPython Team"] authors = ["RustPython Team"]
edition = "2021" edition = "2021"
@ -8,7 +8,6 @@ repository = "https://github.com/RustPython/RustPython"
license = "MIT" license = "MIT"
[dependencies] [dependencies]
bitflags = { workspace = true }
itertools = { workspace = true } itertools = { workspace = true }
num-bigint = { workspace = true } num-bigint = { workspace = true }
num-complex = { workspace = true } num-complex = { workspace = true }
@ -18,3 +17,6 @@ ruff_python_ast = { workspace = true }
lz4_flex = "0.9.2" lz4_flex = "0.9.2"
[features]
default = ["source-code"]
source-code = []

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
use crate::{source_code::SourceLocation, text_size::TextSize}; use crate::text_size::TextSize;
use std::fmt::Display; use std::fmt::Display;
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -61,79 +61,4 @@ impl<T> BaseError<T> {
{ {
BaseError::from(self) BaseError::from(self)
} }
pub fn into_located<U>(self, locator: &mut super::SourceLocator) -> LocatedError<U>
where
T: Into<U>,
{
let location = locator.locate(self.offset);
LocatedError {
error: self.error.into(),
location: Some(location),
source_path: self.source_path,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedError<T> {
pub error: T,
pub location: Option<SourceLocation>,
pub source_path: String,
}
impl<T> LocatedError<T> {
pub fn error(self) -> T {
self.error
}
pub fn from<U>(obj: LocatedError<U>) -> Self
where
U: Into<T>,
{
Self {
error: obj.error.into(),
location: obj.location,
source_path: obj.source_path,
}
}
pub fn into<U>(self) -> LocatedError<U>
where
T: Into<U>,
{
LocatedError::from(self)
}
pub fn python_location(&self) -> (usize, usize) {
if let Some(location) = self.location {
(
location.row.to_one_indexed(),
location.column.to_one_indexed(),
)
} else {
(0, 0)
}
}
}
impl<T> Display for LocatedError<T>
where
T: std::fmt::Display,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let (row, column) = self.location.map_or((0, 0), |l| {
(l.row.to_one_indexed(), l.column.to_one_indexed())
});
write!(f, "{} at row {} col {}", &self.error, row, column,)
}
}
impl<T> std::error::Error for LocatedError<T>
where
T: std::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.error)
}
} }

13
core/src/format.rs Normal file
View file

@ -0,0 +1,13 @@
/// Transforms a value prior to formatting it.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum ConversionFlag {
/// No conversion
None = 0, // CPython uses -1 but not pleasure for us
/// Converts by calling `str(<value>)`.
Str = b's',
/// Converts by calling `ascii(<value>)`.
Ascii = b'a',
/// Converts by calling `repr(<value>)`.
Repr = b'r',
}

View file

@ -1,41 +1,15 @@
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")] #![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")]
#![doc(html_root_url = "https://docs.rs/rustpython-compiler-core/")] #![doc(html_root_url = "https://docs.rs/rustpython-parser-core/")]
// parser core
mod error; mod error;
mod mode; mod format;
pub mod mode;
#[cfg(feature = "source-code")]
pub mod source_code;
pub use error::BaseError; pub use error::BaseError;
pub use format::ConversionFlag;
pub use mode::Mode; pub use mode::Mode;
pub use ruff_text_size as text_size; // re-export mandatory and frequently accessed dependency
// compiler core // re-export our public interface
mod bytecode; pub use ruff_text_size as text_size;
pub mod marshal;
pub use bytecode::*;
pub use error::LocatedError;
pub use ruff_python_ast::source_code;
pub use ruff_python_ast::source_code::OneIndexed as LineNumber;
use source_code::{LineIndex, SourceCode, SourceLocation};
use text_size::TextSize;
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct SourceLocator<'a> {
pub source: &'a str,
index: LineIndex,
}
impl<'a> SourceLocator<'a> {
#[inline]
pub fn new(source: &'a str) -> Self {
let index = LineIndex::from_source_text(source);
Self { source, index }
}
pub fn locate(&mut self, offset: TextSize) -> SourceLocation {
let code = SourceCode::new(self.source, &self.index);
let offset = unsafe { std::mem::transmute(offset) }; // temp code to fix text_size dependency
code.source_location(offset)
}
}

View file

@ -1,635 +0,0 @@
use core::fmt;
use std::convert::Infallible;
use num_bigint::{BigInt, Sign};
use num_complex::Complex64;
use crate::{
bytecode::*,
source_code::{OneIndexed, SourceLocation},
};
pub const FORMAT_VERSION: u32 = 4;
#[derive(Debug)]
pub enum MarshalError {
/// Unexpected End Of File
Eof,
/// Invalid Bytecode
InvalidBytecode,
/// Invalid utf8 in string
InvalidUtf8,
/// Invalid source location
InvalidLocation,
/// Bad type marker
BadType,
}
impl fmt::Display for MarshalError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Eof => f.write_str("unexpected end of data"),
Self::InvalidBytecode => f.write_str("invalid bytecode"),
Self::InvalidUtf8 => f.write_str("invalid utf8"),
Self::InvalidLocation => f.write_str("invalid source location"),
Self::BadType => f.write_str("bad type marker"),
}
}
}
impl From<std::str::Utf8Error> for MarshalError {
fn from(_: std::str::Utf8Error) -> Self {
Self::InvalidUtf8
}
}
impl std::error::Error for MarshalError {}
type Result<T, E = MarshalError> = std::result::Result<T, E>;
#[repr(u8)]
enum Type {
// Null = b'0',
None = b'N',
False = b'F',
True = b'T',
StopIter = b'S',
Ellipsis = b'.',
Int = b'i',
Float = b'g',
Complex = b'y',
// Long = b'l', // i32
Bytes = b's', // = TYPE_STRING
// Interned = b't',
// Ref = b'r',
Tuple = b'(',
List = b'[',
Dict = b'{',
Code = b'c',
Unicode = b'u',
// Unknown = b'?',
Set = b'<',
FrozenSet = b'>',
Ascii = b'a',
// AsciiInterned = b'A',
// SmallTuple = b')',
// ShortAscii = b'z',
// ShortAsciiInterned = b'Z',
}
// const FLAG_REF: u8 = b'\x80';
impl TryFrom<u8> for Type {
type Error = MarshalError;
fn try_from(value: u8) -> Result<Self> {
use Type::*;
Ok(match value {
// b'0' => Null,
b'N' => None,
b'F' => False,
b'T' => True,
b'S' => StopIter,
b'.' => Ellipsis,
b'i' => Int,
b'g' => Float,
b'y' => Complex,
// b'l' => Long,
b's' => Bytes,
// b't' => Interned,
// b'r' => Ref,
b'(' => Tuple,
b'[' => List,
b'{' => Dict,
b'c' => Code,
b'u' => Unicode,
// b'?' => Unknown,
b'<' => Set,
b'>' => FrozenSet,
b'a' => Ascii,
// b'A' => AsciiInterned,
// b')' => SmallTuple,
// b'z' => ShortAscii,
// b'Z' => ShortAsciiInterned,
_ => return Err(MarshalError::BadType),
})
}
}
pub trait Read {
fn read_slice(&mut self, n: u32) -> Result<&[u8]>;
fn read_array<const N: usize>(&mut self) -> Result<&[u8; N]> {
self.read_slice(N as u32).map(|s| s.try_into().unwrap())
}
fn read_str(&mut self, len: u32) -> Result<&str> {
Ok(std::str::from_utf8(self.read_slice(len)?)?)
}
fn read_u8(&mut self) -> Result<u8> {
Ok(u8::from_le_bytes(*self.read_array()?))
}
fn read_u16(&mut self) -> Result<u16> {
Ok(u16::from_le_bytes(*self.read_array()?))
}
fn read_u32(&mut self) -> Result<u32> {
Ok(u32::from_le_bytes(*self.read_array()?))
}
fn read_u64(&mut self) -> Result<u64> {
Ok(u64::from_le_bytes(*self.read_array()?))
}
}
pub(crate) trait ReadBorrowed<'a>: Read {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
}
}
impl Read for &[u8] {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
self.read_slice_borrow(n)
}
}
impl<'a> ReadBorrowed<'a> for &'a [u8] {
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
*self = &self[n as usize..];
Ok(data)
}
}
pub struct Cursor<B> {
pub data: B,
pub position: usize,
}
impl<B: AsRef<[u8]>> Read for Cursor<B> {
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
let data = &self.data.as_ref()[self.position..];
let slice = data.get(..n as usize).ok_or(MarshalError::Eof)?;
self.position += n as usize;
Ok(slice)
}
}
pub fn deserialize_code<R: Read, Bag: ConstantBag>(
rdr: &mut R,
bag: Bag,
) -> Result<CodeObject<Bag::Constant>> {
let len = rdr.read_u32()?;
let instructions = rdr.read_slice(len * 2)?;
let instructions = instructions
.chunks_exact(2)
.map(|cu| {
let op = Instruction::try_from(cu[0])?;
let arg = OpArgByte(cu[1]);
Ok(CodeUnit { op, arg })
})
.collect::<Result<Box<[CodeUnit]>>>()?;
let len = rdr.read_u32()?;
let locations = (0..len)
.map(|_| {
Ok(SourceLocation {
row: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
column: OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?,
})
})
.collect::<Result<Box<[SourceLocation]>>>()?;
let flags = CodeFlags::from_bits_truncate(rdr.read_u16()?);
let posonlyarg_count = rdr.read_u32()?;
let arg_count = rdr.read_u32()?;
let kwonlyarg_count = rdr.read_u32()?;
let len = rdr.read_u32()?;
let source_path = bag.make_name(rdr.read_str(len)?);
let first_line_number =
OneIndexed::new(rdr.read_u32()?).ok_or(MarshalError::InvalidLocation)?;
let max_stackdepth = rdr.read_u32()?;
let len = rdr.read_u32()?;
let obj_name = bag.make_name(rdr.read_str(len)?);
let len = rdr.read_u32()?;
let cell2arg = (len != 0)
.then(|| {
(0..len)
.map(|_| Ok(rdr.read_u32()? as i32))
.collect::<Result<Box<[i32]>>>()
})
.transpose()?;
let len = rdr.read_u32()?;
let constants = (0..len)
.map(|_| deserialize_value(rdr, bag))
.collect::<Result<Box<[_]>>>()?;
let mut read_names = || {
let len = rdr.read_u32()?;
(0..len)
.map(|_| {
let len = rdr.read_u32()?;
Ok(bag.make_name(rdr.read_str(len)?))
})
.collect::<Result<Box<[_]>>>()
};
let names = read_names()?;
let varnames = read_names()?;
let cellvars = read_names()?;
let freevars = read_names()?;
Ok(CodeObject {
instructions,
locations,
flags,
posonlyarg_count,
arg_count,
kwonlyarg_count,
source_path,
first_line_number,
max_stackdepth,
obj_name,
cell2arg,
constants,
names,
varnames,
cellvars,
freevars,
})
}
pub trait MarshalBag: Copy {
type Value;
fn make_bool(&self, value: bool) -> Self::Value;
fn make_none(&self) -> Self::Value;
fn make_ellipsis(&self) -> Self::Value;
fn make_float(&self, value: f64) -> Self::Value;
fn make_complex(&self, value: Complex64) -> Self::Value;
fn make_str(&self, value: &str) -> Self::Value;
fn make_bytes(&self, value: &[u8]) -> Self::Value;
fn make_int(&self, value: BigInt) -> Self::Value;
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value;
fn make_code(
&self,
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
) -> Self::Value;
fn make_stop_iter(&self) -> Result<Self::Value>;
fn make_list(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_set(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_frozenset(&self, it: impl Iterator<Item = Self::Value>) -> Result<Self::Value>;
fn make_dict(
&self,
it: impl Iterator<Item = (Self::Value, Self::Value)>,
) -> Result<Self::Value>;
type ConstantBag: ConstantBag;
fn constant_bag(self) -> Self::ConstantBag;
}
impl<Bag: ConstantBag> MarshalBag for Bag {
type Value = Bag::Constant;
fn make_bool(&self, value: bool) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Boolean { value })
}
fn make_none(&self) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::None)
}
fn make_ellipsis(&self) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Ellipsis)
}
fn make_float(&self, value: f64) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Float { value })
}
fn make_complex(&self, value: Complex64) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Complex { value })
}
fn make_str(&self, value: &str) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Str { value })
}
fn make_bytes(&self, value: &[u8]) -> Self::Value {
self.make_constant::<Bag::Constant>(BorrowedConstant::Bytes { value })
}
fn make_int(&self, value: BigInt) -> Self::Value {
self.make_int(value)
}
fn make_tuple(&self, elements: impl Iterator<Item = Self::Value>) -> Self::Value {
self.make_tuple(elements)
}
fn make_code(
&self,
code: CodeObject<<Self::ConstantBag as ConstantBag>::Constant>,
) -> Self::Value {
self.make_code(code)
}
fn make_stop_iter(&self) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_list(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_set(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_frozenset(&self, _: impl Iterator<Item = Self::Value>) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
fn make_dict(
&self,
_: impl Iterator<Item = (Self::Value, Self::Value)>,
) -> Result<Self::Value> {
Err(MarshalError::BadType)
}
type ConstantBag = Self;
fn constant_bag(self) -> Self::ConstantBag {
self
}
}
pub fn deserialize_value<R: Read, Bag: MarshalBag>(rdr: &mut R, bag: Bag) -> Result<Bag::Value> {
let typ = Type::try_from(rdr.read_u8()?)?;
let value = match typ {
Type::True => bag.make_bool(true),
Type::False => bag.make_bool(false),
Type::None => bag.make_none(),
Type::StopIter => bag.make_stop_iter()?,
Type::Ellipsis => bag.make_ellipsis(),
Type::Int => {
let len = rdr.read_u32()? as i32;
let sign = if len < 0 { Sign::Minus } else { Sign::Plus };
let bytes = rdr.read_slice(len.unsigned_abs())?;
let int = BigInt::from_bytes_le(sign, bytes);
bag.make_int(int)
}
Type::Float => {
let value = f64::from_bits(rdr.read_u64()?);
bag.make_float(value)
}
Type::Complex => {
let re = f64::from_bits(rdr.read_u64()?);
let im = f64::from_bits(rdr.read_u64()?);
let value = Complex64 { re, im };
bag.make_complex(value)
}
Type::Ascii | Type::Unicode => {
let len = rdr.read_u32()?;
let value = rdr.read_str(len)?;
bag.make_str(value)
}
Type::Tuple => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_tuple(it))?
}
Type::List => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_list(it))??
}
Type::Set => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_set(it))??
}
Type::FrozenSet => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| deserialize_value(rdr, bag));
itertools::process_results(it, |it| bag.make_frozenset(it))??
}
Type::Dict => {
let len = rdr.read_u32()?;
let it = (0..len).map(|_| {
let k = deserialize_value(rdr, bag)?;
let v = deserialize_value(rdr, bag)?;
Ok::<_, MarshalError>((k, v))
});
itertools::process_results(it, |it| bag.make_dict(it))??
}
Type::Bytes => {
// Following CPython, after marshaling, byte arrays are converted into bytes.
let len = rdr.read_u32()?;
let value = rdr.read_slice(len)?;
bag.make_bytes(value)
}
Type::Code => bag.make_code(deserialize_code(rdr, bag.constant_bag())?),
};
Ok(value)
}
pub trait Dumpable: Sized {
type Error;
type Constant: Constant;
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error>;
}
pub enum DumpableValue<'a, D: Dumpable> {
Integer(&'a BigInt),
Float(f64),
Complex(Complex64),
Boolean(bool),
Str(&'a str),
Bytes(&'a [u8]),
Code(&'a CodeObject<D::Constant>),
Tuple(&'a [D]),
None,
Ellipsis,
StopIter,
List(&'a [D]),
Set(&'a [D]),
Frozenset(&'a [D]),
Dict(&'a [(D, D)]),
}
impl<'a, C: Constant> From<BorrowedConstant<'a, C>> for DumpableValue<'a, C> {
fn from(c: BorrowedConstant<'a, C>) -> Self {
match c {
BorrowedConstant::Integer { value } => Self::Integer(value),
BorrowedConstant::Float { value } => Self::Float(value),
BorrowedConstant::Complex { value } => Self::Complex(value),
BorrowedConstant::Boolean { value } => Self::Boolean(value),
BorrowedConstant::Str { value } => Self::Str(value),
BorrowedConstant::Bytes { value } => Self::Bytes(value),
BorrowedConstant::Code { code } => Self::Code(code),
BorrowedConstant::Tuple { elements } => Self::Tuple(elements),
BorrowedConstant::None => Self::None,
BorrowedConstant::Ellipsis => Self::Ellipsis,
}
}
}
impl<C: Constant> Dumpable for C {
type Error = Infallible;
type Constant = Self;
#[inline(always)]
fn with_dump<R>(&self, f: impl FnOnce(DumpableValue<'_, Self>) -> R) -> Result<R, Self::Error> {
Ok(f(self.borrow_constant().into()))
}
}
pub trait Write {
fn write_slice(&mut self, slice: &[u8]);
fn write_u8(&mut self, v: u8) {
self.write_slice(&v.to_le_bytes())
}
fn write_u16(&mut self, v: u16) {
self.write_slice(&v.to_le_bytes())
}
fn write_u32(&mut self, v: u32) {
self.write_slice(&v.to_le_bytes())
}
fn write_u64(&mut self, v: u64) {
self.write_slice(&v.to_le_bytes())
}
}
impl Write for Vec<u8> {
fn write_slice(&mut self, slice: &[u8]) {
self.extend_from_slice(slice)
}
}
pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
let Ok(len) = len.try_into() else { panic!("too long to serialize") };
buf.write_u32(len);
}
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
write_len(buf, slice.len());
buf.write_slice(slice);
}
pub fn serialize_value<W: Write, D: Dumpable>(
buf: &mut W,
constant: DumpableValue<'_, D>,
) -> Result<(), D::Error> {
match constant {
DumpableValue::Integer(int) => {
buf.write_u8(Type::Int as u8);
let (sign, bytes) = int.to_bytes_le();
let len: i32 = bytes.len().try_into().expect("too long to serialize");
let len = if sign == Sign::Minus { -len } else { len };
buf.write_u32(len as u32);
buf.write_slice(&bytes);
}
DumpableValue::Float(f) => {
buf.write_u8(Type::Float as u8);
buf.write_u64(f.to_bits());
}
DumpableValue::Complex(c) => {
buf.write_u8(Type::Complex as u8);
buf.write_u64(c.re.to_bits());
buf.write_u64(c.im.to_bits());
}
DumpableValue::Boolean(b) => {
buf.write_u8(if b { Type::True } else { Type::False } as u8);
}
DumpableValue::Str(s) => {
buf.write_u8(Type::Unicode as u8);
write_vec(buf, s.as_bytes());
}
DumpableValue::Bytes(b) => {
buf.write_u8(Type::Bytes as u8);
write_vec(buf, b);
}
DumpableValue::Code(c) => {
buf.write_u8(Type::Code as u8);
serialize_code(buf, c);
}
DumpableValue::Tuple(tup) => {
buf.write_u8(Type::Tuple as u8);
write_len(buf, tup.len());
for val in tup {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::None => {
buf.write_u8(Type::None as u8);
}
DumpableValue::Ellipsis => {
buf.write_u8(Type::Ellipsis as u8);
}
DumpableValue::StopIter => {
buf.write_u8(Type::StopIter as u8);
}
DumpableValue::List(l) => {
buf.write_u8(Type::List as u8);
write_len(buf, l.len());
for val in l {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Set(set) => {
buf.write_u8(Type::Set as u8);
write_len(buf, set.len());
for val in set {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Frozenset(set) => {
buf.write_u8(Type::FrozenSet as u8);
write_len(buf, set.len());
for val in set {
val.with_dump(|val| serialize_value(buf, val))??
}
}
DumpableValue::Dict(d) => {
buf.write_u8(Type::Dict as u8);
write_len(buf, d.len());
for (k, v) in d {
k.with_dump(|val| serialize_value(buf, val))??;
v.with_dump(|val| serialize_value(buf, val))??;
}
}
}
Ok(())
}
pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>) {
write_len(buf, code.instructions.len());
// SAFETY: it's ok to transmute CodeUnit to [u8; 2]
let (_, instructions_bytes, _) = unsafe { code.instructions.align_to() };
buf.write_slice(instructions_bytes);
write_len(buf, code.locations.len());
for loc in &*code.locations {
buf.write_u32(loc.row.get() as _);
buf.write_u32(loc.column.get() as _);
}
buf.write_u16(code.flags.bits());
buf.write_u32(code.posonlyarg_count);
buf.write_u32(code.arg_count);
buf.write_u32(code.kwonlyarg_count);
write_vec(buf, code.source_path.as_ref().as_bytes());
buf.write_u32(code.first_line_number.get());
buf.write_u32(code.max_stackdepth);
write_vec(buf, code.obj_name.as_ref().as_bytes());
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
write_len(buf, cell2arg.len());
for &i in cell2arg {
buf.write_u32(i as u32)
}
write_len(buf, code.constants.len());
for constant in &*code.constants {
serialize_value(buf, constant.borrow_constant().into()).unwrap_or_else(|x| match x {})
}
let mut write_names = |names: &[C::Name]| {
write_len(buf, names.len());
for name in names {
write_vec(buf, name.as_ref().as_bytes());
}
};
write_names(&code.names);
write_names(&code.varnames);
write_names(&code.cellvars);
write_names(&code.freevars);
}

View file

@ -1,27 +1,30 @@
//! Control in the different modes by which a source file can be parsed.
/// The mode argument specifies in what way code must be parsed.
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub enum Mode { pub enum Mode {
Exec, /// The code consists of a sequence of statements.
Eval, Module,
Single, /// The code consists of a sequence of interactive statement.
BlockExpr, Interactive,
/// The code consists of a single expression.
Expression,
} }
impl std::str::FromStr for Mode { impl std::str::FromStr for Mode {
type Err = ModeParseError; type Err = ModeParseError;
// To support `builtins.compile()` `mode` argument
fn from_str(s: &str) -> Result<Self, ModeParseError> { fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s { match s {
"exec" => Ok(Mode::Exec), "exec" | "single" => Ok(Mode::Module),
"eval" => Ok(Mode::Eval), "eval" => Ok(Mode::Expression),
"single" => Ok(Mode::Single), _ => Err(ModeParseError),
_ => Err(ModeParseError(())),
} }
} }
} }
/// Returned when a given mode is not valid.
#[derive(Debug)] #[derive(Debug)]
pub struct ModeParseError(()); pub struct ModeParseError;
impl std::fmt::Display for ModeParseError { impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {

126
core/src/source_code.rs Normal file
View file

@ -0,0 +1,126 @@
// re-export our public interface
pub use ruff_python_ast::source_code::*;
pub type LineNumber = ruff_python_ast::source_code::OneIndexed;
#[derive(Debug)]
pub struct SourceRange {
pub start: SourceLocation,
pub end: Option<SourceLocation>,
}
impl SourceRange {
pub fn new(start: SourceLocation, end: SourceLocation) -> Self {
Self {
start,
end: Some(end),
}
}
pub fn unwrap_end(&self) -> SourceLocation {
self.end.unwrap()
}
}
impl From<std::ops::Range<SourceLocation>> for SourceRange {
fn from(value: std::ops::Range<SourceLocation>) -> Self {
Self {
start: value.start,
end: Some(value.end),
}
}
}
/// Converts source code byte-offset to Python convention line and column numbers.
pub struct SourceLocator<'a> {
pub source: &'a str,
index: LineIndex,
}
impl<'a> SourceLocator<'a> {
#[inline]
pub fn new(source: &'a str) -> Self {
let index = LineIndex::from_source_text(source);
Self { source, index }
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode::new(self.source, &self.index)
}
pub fn locate(&mut self, offset: crate::text_size::TextSize) -> SourceLocation {
let offset = offset.to_u32().into();
self.to_source_code().source_location(offset)
}
pub fn locate_error<T, U>(&mut self, base: crate::error::BaseError<T>) -> LocatedError<U>
where
T: Into<U>,
{
let location = self.locate(base.offset);
LocatedError {
error: base.error.into(),
location: Some(location),
source_path: base.source_path,
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedError<T> {
pub error: T,
pub location: Option<SourceLocation>,
pub source_path: String,
}
impl<T> LocatedError<T> {
pub fn error(self) -> T {
self.error
}
pub fn from<U>(obj: LocatedError<U>) -> Self
where
U: Into<T>,
{
Self {
error: obj.error.into(),
location: obj.location,
source_path: obj.source_path,
}
}
pub fn into<U>(self) -> LocatedError<U>
where
T: Into<U>,
{
LocatedError::from(self)
}
pub fn python_location(&self) -> (usize, usize) {
if let Some(location) = self.location {
(location.row.to_usize(), location.column.to_usize())
} else {
(0, 0)
}
}
}
impl<T> std::fmt::Display for LocatedError<T>
where
T: std::fmt::Display,
{
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let (row, column) = self
.location
.map_or((0, 0), |l| (l.row.to_usize(), l.column.to_usize()));
write!(f, "{} at row {} col {}", &self.error, row, column,)
}
}
impl<T> std::error::Error for LocatedError<T>
where
T: std::error::Error + 'static,
{
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.error)
}
}

View file

@ -10,7 +10,7 @@ edition = "2021"
[features] [features]
default = [] default = []
serde = ["dep:serde", "rustpython-compiler-core/serde"] serde = ["dep:serde", "rustpython-parser-core/serde"]
[build-dependencies] [build-dependencies]
anyhow = { workspace = true } anyhow = { workspace = true }
@ -19,10 +19,9 @@ phf_codegen = "0.11.1"
tiny-keccak = { version = "2", features = ["sha3"] } tiny-keccak = { version = "2", features = ["sha3"] }
[dependencies] [dependencies]
rustpython-ast = { path = "../ast", version = "0.2.0" } rustpython-ast = { workspace = true }
rustpython-compiler-core = { path = "../core", version = "0.2.0" } rustpython-parser-core = { workspace = true }
ahash = { workspace = true }
itertools = { workspace = true } itertools = { workspace = true }
log = { workspace = true } log = { workspace = true }
num-bigint = { workspace = true } num-bigint = { workspace = true }

View file

@ -28,11 +28,11 @@
//! //!
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
use crate::{ use crate::{
mode::Mode,
soft_keywords::SoftKeywordTransformer, soft_keywords::SoftKeywordTransformer,
string::FStringErrorType, string::FStringErrorType,
text_size::{TextLen, TextRange, TextSize}, text_size::{TextLen, TextRange, TextSize},
token::{StringKind, Tok}, token::{StringKind, Tok},
Mode,
}; };
use log::trace; use log::trace;
use num_bigint::BigInt; use num_bigint::BigInt;

View file

@ -113,20 +113,17 @@
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")] #![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
pub use rustpython_ast as ast; pub use rustpython_ast as ast;
pub use rustpython_compiler_core::text_size; pub use rustpython_parser_core::{source_code, text_size, Mode};
pub use rustpython_compiler_core::ConversionFlag;
mod function; mod function;
// Skip flattening lexer to distinguish from full parser // Skip flattening lexer to distinguish from full parser
mod context; mod context;
pub mod lexer; pub mod lexer;
mod mode;
mod parser; mod parser;
mod soft_keywords; mod soft_keywords;
mod string; mod string;
mod token; mod token;
pub use mode::Mode;
pub use parser::{ pub use parser::{
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens, parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
ParseError, ParseErrorType, ParseError, ParseErrorType,

View file

@ -1,55 +0,0 @@
//! Control in the different modes by which a source file can be parsed.
use crate::token::Tok;
/// The mode argument specifies in what way code must be parsed.
#[derive(Clone, Copy)]
pub enum Mode {
/// The code consists of a sequence of statements.
Module,
/// The code consists of a sequence of interactive statement.
Interactive,
/// The code consists of a single expression.
Expression,
}
impl Mode {
pub(crate) fn to_marker(self) -> Tok {
match self {
Self::Module => Tok::StartModule,
Self::Interactive => Tok::StartInteractive,
Self::Expression => Tok::StartExpression,
}
}
}
impl From<rustpython_compiler_core::Mode> for Mode {
fn from(mode: rustpython_compiler_core::Mode) -> Self {
use rustpython_compiler_core::Mode as CompileMode;
match mode {
CompileMode::Exec => Self::Module,
CompileMode::Eval => Self::Expression,
CompileMode::Single | CompileMode::BlockExpr => Self::Interactive,
}
}
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" | "single" => Ok(Mode::Module),
"eval" => Ok(Mode::Expression),
_ => Err(ModeParseError(())),
}
}
}
/// Returned when a given mode is not valid.
#[derive(Debug)]
pub struct ModeParseError(());
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode must be "exec", "eval", or "single""#)
}
}

View file

@ -15,10 +15,10 @@
use crate::{ use crate::{
ast, ast,
lexer::{self, LexResult, LexicalError, LexicalErrorType}, lexer::{self, LexResult, LexicalError, LexicalErrorType},
mode::Mode,
python, python,
text_size::TextSize, text_size::TextSize,
token::Tok, token::Tok,
Mode,
}; };
use itertools::Itertools; use itertools::Itertools;
use std::iter; use std::iter;
@ -187,7 +187,7 @@ pub fn parse_tokens(
mode: Mode, mode: Mode,
source_path: &str, source_path: &str,
) -> Result<ast::Mod, ParseError> { ) -> Result<ast::Mod, ParseError> {
let marker_token = (mode.to_marker(), Default::default()); let marker_token = (Tok::start_marker(mode), Default::default());
let lexer = iter::once(Ok(marker_token)) let lexer = iter::once(Ok(marker_token))
.chain(lxr) .chain(lxr)
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); .filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
@ -202,7 +202,7 @@ pub fn parse_tokens(
/// Represents represent errors that occur during parsing and are /// Represents represent errors that occur during parsing and are
/// returned by the `parse_*` functions. /// returned by the `parse_*` functions.
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>; pub type ParseError = rustpython_parser_core::BaseError<ParseErrorType>;
/// Represents the different types of errors that can occur during parsing. /// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]

View file

@ -10,6 +10,7 @@ use crate::{
context::set_context, context::set_context,
string::parse_strings, string::parse_strings,
token::{self, StringKind}, token::{self, StringKind},
text_size::TextSize,
}; };
use num_bigint::BigInt; use num_bigint::BigInt;
@ -254,7 +255,7 @@ ImportStatement: ast::Stmt = {
}, },
}; };
ImportFromLocation: (Option<usize>, Option<String>) = { ImportFromLocation: (Option<u32>, Option<String>) = {
<dots: ImportDots*> <name:DottedName> => { <dots: ImportDots*> <name:DottedName> => {
(Some(dots.iter().sum()), Some(name)) (Some(dots.iter().sum()), Some(name))
}, },
@ -263,7 +264,7 @@ ImportFromLocation: (Option<usize>, Option<String>) = {
}, },
}; };
ImportDots: usize = { ImportDots: u32 = {
"..." => 3, "..." => 3,
"." => 1, "." => 1,
}; };
@ -1721,7 +1722,7 @@ ArgumentList: ArgumentList = {
} }
}; };
FunctionArgument: (Option<(crate::text_size::TextSize, crate::text_size::TextSize, Option<String>)>, ast::Expr) = { FunctionArgument: (Option<(TextSize, TextSize, Option<String>)>, ast::Expr) = {
<location:@L> <e:NamedExpressionTest> <c:CompFor?> <end_location:@R> => { <location:@L> <e:NamedExpressionTest> <c:CompFor?> <end_location:@R> => {
let expr = match c { let expr = match c {
Some(c) => ast::Expr::new( Some(c) => ast::Expr::new(
@ -1775,7 +1776,7 @@ Identifier: String = <s:name> => s;
// Hook external lexer: // Hook external lexer:
extern { extern {
type Location = crate::text_size::TextSize; type Location = TextSize;
type Error = LexicalError; type Error = LexicalError;
enum token::Tok { enum token::Tok {

18926
parser/src/python.rs generated

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
use crate::{lexer::LexResult, mode::Mode, token::Tok}; use crate::{lexer::LexResult, token::Tok, Mode};
use itertools::{Itertools, MultiPeek}; use itertools::{Itertools, MultiPeek};
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match` /// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`

View file

@ -4,13 +4,16 @@
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701) // regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
// we have to do the parsing here, manually. // we have to do the parsing here, manually.
use crate::{ use crate::{
ast::{self, Constant, ConversionFlag, Expr, ExprKind}, ast::{self, Constant, Expr, ExprKind},
lexer::{LexicalError, LexicalErrorType}, lexer::{LexicalError, LexicalErrorType},
parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType}, parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
token::{StringKind, Tok}, token::{StringKind, Tok},
}; };
use itertools::Itertools; use itertools::Itertools;
use rustpython_compiler_core::text_size::{TextLen, TextSize}; use rustpython_parser_core::{
text_size::{TextLen, TextSize},
ConversionFlag,
};
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798 // unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
const MAX_UNICODE_NAME: usize = 88; const MAX_UNICODE_NAME: usize = 88;

View file

@ -4,7 +4,7 @@
//! loosely based on the token definitions found in the [CPython source]. //! loosely based on the token definitions found in the [CPython source].
//! //!
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h //! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h
use crate::text_size::TextSize; use crate::{text_size::TextSize, Mode};
use num_bigint::BigInt; use num_bigint::BigInt;
use std::fmt; use std::fmt;
@ -196,6 +196,16 @@ pub enum Tok {
StartExpression, StartExpression,
} }
impl Tok {
pub fn start_marker(mode: Mode) -> Self {
match mode {
Mode::Module => Tok::StartModule,
Mode::Interactive => Tok::StartInteractive,
Mode::Expression => Tok::StartExpression,
}
}
}
impl fmt::Display for Tok { impl fmt::Display for Tok {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Tok::*; use Tok::*;
@ -404,10 +414,11 @@ impl StringKind {
/// Returns the number of characters in the prefix. /// Returns the number of characters in the prefix.
pub fn prefix_len(&self) -> TextSize { pub fn prefix_len(&self) -> TextSize {
use StringKind::*; use StringKind::*;
match self { let len = match self {
String => TextSize::from(0), String => 0,
RawString | FString | Unicode | Bytes => TextSize::from(1), RawString | FString | Unicode | Bytes => 1,
RawFString | RawBytes => TextSize::from(2), RawFString | RawBytes => 2,
} };
len.into()
} }
} }