numerous refactoring

- Split parser core and compiler core. Fix #14
- AST int type to `u32`
- Updated asdl_rs.py and update_asdl.sh fix #6
- Use `ruff_python_ast::SourceLocation` for Python source location. Deleted our own Location.
- Renamed ast::Located to ast::Attributed to distinguish terms for TextSize and SourceLocation
- `ast::<Node>`s for TextSize located ast. `ast::located::<Node>` for Python source located ast.
- And also strictly renaming `located` to refer only python location related interfaces.
- `SourceLocator` to convert locations.
- New `source-code` features of to disable python locations when unnecessary.
- Also including fully merging https://github.com/astral-sh/RustPython/pull/4 closes #9
This commit is contained in:
Jeong YunWon 2023-05-10 02:36:52 +09:00
parent 09a6afdd04
commit a3d9d8cb14
29 changed files with 9737 additions and 12000 deletions

View file

@ -28,11 +28,11 @@
//!
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
use crate::{
mode::Mode,
soft_keywords::SoftKeywordTransformer,
string::FStringErrorType,
text_size::{TextLen, TextRange, TextSize},
token::{StringKind, Tok},
Mode,
};
use log::trace;
use num_bigint::BigInt;

View file

@ -113,20 +113,17 @@
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
pub use rustpython_ast as ast;
pub use rustpython_compiler_core::text_size;
pub use rustpython_compiler_core::ConversionFlag;
pub use rustpython_parser_core::{source_code, text_size, Mode};
mod function;
// Skip flattening lexer to distinguish from full parser
mod context;
pub mod lexer;
mod mode;
mod parser;
mod soft_keywords;
mod string;
mod token;
pub use mode::Mode;
pub use parser::{
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
ParseError, ParseErrorType,

View file

@ -1,55 +0,0 @@
//! Control in the different modes by which a source file can be parsed.
use crate::token::Tok;
/// The mode argument specifies in what way code must be parsed.
#[derive(Clone, Copy)]
pub enum Mode {
/// The code consists of a sequence of statements.
Module,
/// The code consists of a sequence of interactive statement.
Interactive,
/// The code consists of a single expression.
Expression,
}
impl Mode {
pub(crate) fn to_marker(self) -> Tok {
match self {
Self::Module => Tok::StartModule,
Self::Interactive => Tok::StartInteractive,
Self::Expression => Tok::StartExpression,
}
}
}
impl From<rustpython_compiler_core::Mode> for Mode {
fn from(mode: rustpython_compiler_core::Mode) -> Self {
use rustpython_compiler_core::Mode as CompileMode;
match mode {
CompileMode::Exec => Self::Module,
CompileMode::Eval => Self::Expression,
CompileMode::Single | CompileMode::BlockExpr => Self::Interactive,
}
}
}
impl std::str::FromStr for Mode {
type Err = ModeParseError;
fn from_str(s: &str) -> Result<Self, ModeParseError> {
match s {
"exec" | "single" => Ok(Mode::Module),
"eval" => Ok(Mode::Expression),
_ => Err(ModeParseError(())),
}
}
}
/// Returned when a given mode is not valid.
#[derive(Debug)]
pub struct ModeParseError(());
impl std::fmt::Display for ModeParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, r#"mode must be "exec", "eval", or "single""#)
}
}

View file

@ -15,10 +15,10 @@
use crate::{
ast,
lexer::{self, LexResult, LexicalError, LexicalErrorType},
mode::Mode,
python,
text_size::TextSize,
token::Tok,
Mode,
};
use itertools::Itertools;
use std::iter;
@ -187,7 +187,7 @@ pub fn parse_tokens(
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
let marker_token = (mode.to_marker(), Default::default());
let marker_token = (Tok::start_marker(mode), Default::default());
let lexer = iter::once(Ok(marker_token))
.chain(lxr)
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
@ -202,7 +202,7 @@ pub fn parse_tokens(
/// Represents represent errors that occur during parsing and are
/// returned by the `parse_*` functions.
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
pub type ParseError = rustpython_parser_core::BaseError<ParseErrorType>;
/// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq)]

View file

@ -10,6 +10,7 @@ use crate::{
context::set_context,
string::parse_strings,
token::{self, StringKind},
text_size::TextSize,
};
use num_bigint::BigInt;
@ -254,7 +255,7 @@ ImportStatement: ast::Stmt = {
},
};
ImportFromLocation: (Option<usize>, Option<String>) = {
ImportFromLocation: (Option<u32>, Option<String>) = {
<dots: ImportDots*> <name:DottedName> => {
(Some(dots.iter().sum()), Some(name))
},
@ -263,7 +264,7 @@ ImportFromLocation: (Option<usize>, Option<String>) = {
},
};
ImportDots: usize = {
ImportDots: u32 = {
"..." => 3,
"." => 1,
};
@ -1721,7 +1722,7 @@ ArgumentList: ArgumentList = {
}
};
FunctionArgument: (Option<(crate::text_size::TextSize, crate::text_size::TextSize, Option<String>)>, ast::Expr) = {
FunctionArgument: (Option<(TextSize, TextSize, Option<String>)>, ast::Expr) = {
<location:@L> <e:NamedExpressionTest> <c:CompFor?> <end_location:@R> => {
let expr = match c {
Some(c) => ast::Expr::new(
@ -1775,7 +1776,7 @@ Identifier: String = <s:name> => s;
// Hook external lexer:
extern {
type Location = crate::text_size::TextSize;
type Location = TextSize;
type Error = LexicalError;
enum token::Tok {

18926
parser/src/python.rs generated

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
use crate::{lexer::LexResult, mode::Mode, token::Tok};
use crate::{lexer::LexResult, token::Tok, Mode};
use itertools::{Itertools, MultiPeek};
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`

View file

@ -4,13 +4,16 @@
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
// we have to do the parsing here, manually.
use crate::{
ast::{self, Constant, ConversionFlag, Expr, ExprKind},
ast::{self, Constant, Expr, ExprKind},
lexer::{LexicalError, LexicalErrorType},
parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
token::{StringKind, Tok},
};
use itertools::Itertools;
use rustpython_compiler_core::text_size::{TextLen, TextSize};
use rustpython_parser_core::{
text_size::{TextLen, TextSize},
ConversionFlag,
};
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
const MAX_UNICODE_NAME: usize = 88;

View file

@ -4,7 +4,7 @@
//! loosely based on the token definitions found in the [CPython source].
//!
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h
use crate::text_size::TextSize;
use crate::{text_size::TextSize, Mode};
use num_bigint::BigInt;
use std::fmt;
@ -196,6 +196,16 @@ pub enum Tok {
StartExpression,
}
impl Tok {
pub fn start_marker(mode: Mode) -> Self {
match mode {
Mode::Module => Tok::StartModule,
Mode::Interactive => Tok::StartInteractive,
Mode::Expression => Tok::StartExpression,
}
}
}
impl fmt::Display for Tok {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Tok::*;
@ -404,10 +414,11 @@ impl StringKind {
/// Returns the number of characters in the prefix.
pub fn prefix_len(&self) -> TextSize {
use StringKind::*;
match self {
String => TextSize::from(0),
RawString | FString | Unicode | Bytes => TextSize::from(1),
RawFString | RawBytes => TextSize::from(2),
}
let len = match self {
String => 0,
RawString | FString | Unicode | Bytes => 1,
RawFString | RawBytes => 2,
};
len.into()
}
}