mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-19 11:05:45 +00:00
numerous refactoring
- Split parser core and compiler core. Fix #14 - AST int type to `u32` - Updated asdl_rs.py and update_asdl.sh fix #6 - Use `ruff_python_ast::SourceLocation` for Python source location. Deleted our own Location. - Renamed ast::Located to ast::Attributed to distinguish terms for TextSize and SourceLocation - `ast::<Node>`s for TextSize located ast. `ast::located::<Node>` for Python source located ast. - And also strictly renaming `located` to refer only python location related interfaces. - `SourceLocator` to convert locations. - New `source-code` features of to disable python locations when unnecessary. - Also including fully merging https://github.com/astral-sh/RustPython/pull/4 closes #9
This commit is contained in:
parent
09a6afdd04
commit
a3d9d8cb14
29 changed files with 9737 additions and 12000 deletions
|
@ -28,11 +28,11 @@
|
|||
//!
|
||||
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
|
||||
use crate::{
|
||||
mode::Mode,
|
||||
soft_keywords::SoftKeywordTransformer,
|
||||
string::FStringErrorType,
|
||||
text_size::{TextLen, TextRange, TextSize},
|
||||
token::{StringKind, Tok},
|
||||
Mode,
|
||||
};
|
||||
use log::trace;
|
||||
use num_bigint::BigInt;
|
||||
|
|
|
@ -113,20 +113,17 @@
|
|||
#![doc(html_root_url = "https://docs.rs/rustpython-parser/")]
|
||||
|
||||
pub use rustpython_ast as ast;
|
||||
pub use rustpython_compiler_core::text_size;
|
||||
pub use rustpython_compiler_core::ConversionFlag;
|
||||
pub use rustpython_parser_core::{source_code, text_size, Mode};
|
||||
|
||||
mod function;
|
||||
// Skip flattening lexer to distinguish from full parser
|
||||
mod context;
|
||||
pub mod lexer;
|
||||
mod mode;
|
||||
mod parser;
|
||||
mod soft_keywords;
|
||||
mod string;
|
||||
mod token;
|
||||
|
||||
pub use mode::Mode;
|
||||
pub use parser::{
|
||||
parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
|
||||
ParseError, ParseErrorType,
|
||||
|
|
|
@ -1,55 +0,0 @@
|
|||
//! Control in the different modes by which a source file can be parsed.
|
||||
use crate::token::Tok;
|
||||
|
||||
/// The mode argument specifies in what way code must be parsed.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum Mode {
|
||||
/// The code consists of a sequence of statements.
|
||||
Module,
|
||||
/// The code consists of a sequence of interactive statement.
|
||||
Interactive,
|
||||
/// The code consists of a single expression.
|
||||
Expression,
|
||||
}
|
||||
|
||||
impl Mode {
|
||||
pub(crate) fn to_marker(self) -> Tok {
|
||||
match self {
|
||||
Self::Module => Tok::StartModule,
|
||||
Self::Interactive => Tok::StartInteractive,
|
||||
Self::Expression => Tok::StartExpression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rustpython_compiler_core::Mode> for Mode {
|
||||
fn from(mode: rustpython_compiler_core::Mode) -> Self {
|
||||
use rustpython_compiler_core::Mode as CompileMode;
|
||||
match mode {
|
||||
CompileMode::Exec => Self::Module,
|
||||
CompileMode::Eval => Self::Expression,
|
||||
CompileMode::Single | CompileMode::BlockExpr => Self::Interactive,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Mode {
|
||||
type Err = ModeParseError;
|
||||
fn from_str(s: &str) -> Result<Self, ModeParseError> {
|
||||
match s {
|
||||
"exec" | "single" => Ok(Mode::Module),
|
||||
"eval" => Ok(Mode::Expression),
|
||||
_ => Err(ModeParseError(())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returned when a given mode is not valid.
|
||||
#[derive(Debug)]
|
||||
pub struct ModeParseError(());
|
||||
|
||||
impl std::fmt::Display for ModeParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, r#"mode must be "exec", "eval", or "single""#)
|
||||
}
|
||||
}
|
|
@ -15,10 +15,10 @@
|
|||
use crate::{
|
||||
ast,
|
||||
lexer::{self, LexResult, LexicalError, LexicalErrorType},
|
||||
mode::Mode,
|
||||
python,
|
||||
text_size::TextSize,
|
||||
token::Tok,
|
||||
Mode,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use std::iter;
|
||||
|
@ -187,7 +187,7 @@ pub fn parse_tokens(
|
|||
mode: Mode,
|
||||
source_path: &str,
|
||||
) -> Result<ast::Mod, ParseError> {
|
||||
let marker_token = (mode.to_marker(), Default::default());
|
||||
let marker_token = (Tok::start_marker(mode), Default::default());
|
||||
let lexer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
|
@ -202,7 +202,7 @@ pub fn parse_tokens(
|
|||
|
||||
/// Represents represent errors that occur during parsing and are
|
||||
/// returned by the `parse_*` functions.
|
||||
pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
|
||||
pub type ParseError = rustpython_parser_core::BaseError<ParseErrorType>;
|
||||
|
||||
/// Represents the different types of errors that can occur during parsing.
|
||||
#[derive(Debug, PartialEq)]
|
||||
|
|
|
@ -10,6 +10,7 @@ use crate::{
|
|||
context::set_context,
|
||||
string::parse_strings,
|
||||
token::{self, StringKind},
|
||||
text_size::TextSize,
|
||||
};
|
||||
use num_bigint::BigInt;
|
||||
|
||||
|
@ -254,7 +255,7 @@ ImportStatement: ast::Stmt = {
|
|||
},
|
||||
};
|
||||
|
||||
ImportFromLocation: (Option<usize>, Option<String>) = {
|
||||
ImportFromLocation: (Option<u32>, Option<String>) = {
|
||||
<dots: ImportDots*> <name:DottedName> => {
|
||||
(Some(dots.iter().sum()), Some(name))
|
||||
},
|
||||
|
@ -263,7 +264,7 @@ ImportFromLocation: (Option<usize>, Option<String>) = {
|
|||
},
|
||||
};
|
||||
|
||||
ImportDots: usize = {
|
||||
ImportDots: u32 = {
|
||||
"..." => 3,
|
||||
"." => 1,
|
||||
};
|
||||
|
@ -1721,7 +1722,7 @@ ArgumentList: ArgumentList = {
|
|||
}
|
||||
};
|
||||
|
||||
FunctionArgument: (Option<(crate::text_size::TextSize, crate::text_size::TextSize, Option<String>)>, ast::Expr) = {
|
||||
FunctionArgument: (Option<(TextSize, TextSize, Option<String>)>, ast::Expr) = {
|
||||
<location:@L> <e:NamedExpressionTest> <c:CompFor?> <end_location:@R> => {
|
||||
let expr = match c {
|
||||
Some(c) => ast::Expr::new(
|
||||
|
@ -1775,7 +1776,7 @@ Identifier: String = <s:name> => s;
|
|||
|
||||
// Hook external lexer:
|
||||
extern {
|
||||
type Location = crate::text_size::TextSize;
|
||||
type Location = TextSize;
|
||||
type Error = LexicalError;
|
||||
|
||||
enum token::Tok {
|
||||
|
|
18926
parser/src/python.rs
generated
18926
parser/src/python.rs
generated
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
|||
use crate::{lexer::LexResult, mode::Mode, token::Tok};
|
||||
use crate::{lexer::LexResult, token::Tok, Mode};
|
||||
use itertools::{Itertools, MultiPeek};
|
||||
|
||||
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
|
||||
|
|
|
@ -4,13 +4,16 @@
|
|||
// regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
|
||||
// we have to do the parsing here, manually.
|
||||
use crate::{
|
||||
ast::{self, Constant, ConversionFlag, Expr, ExprKind},
|
||||
ast::{self, Constant, Expr, ExprKind},
|
||||
lexer::{LexicalError, LexicalErrorType},
|
||||
parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
|
||||
token::{StringKind, Tok},
|
||||
};
|
||||
use itertools::Itertools;
|
||||
use rustpython_compiler_core::text_size::{TextLen, TextSize};
|
||||
use rustpython_parser_core::{
|
||||
text_size::{TextLen, TextSize},
|
||||
ConversionFlag,
|
||||
};
|
||||
|
||||
// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
|
||||
const MAX_UNICODE_NAME: usize = 88;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
//! loosely based on the token definitions found in the [CPython source].
|
||||
//!
|
||||
//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h
|
||||
use crate::text_size::TextSize;
|
||||
use crate::{text_size::TextSize, Mode};
|
||||
use num_bigint::BigInt;
|
||||
use std::fmt;
|
||||
|
||||
|
@ -196,6 +196,16 @@ pub enum Tok {
|
|||
StartExpression,
|
||||
}
|
||||
|
||||
impl Tok {
|
||||
pub fn start_marker(mode: Mode) -> Self {
|
||||
match mode {
|
||||
Mode::Module => Tok::StartModule,
|
||||
Mode::Interactive => Tok::StartInteractive,
|
||||
Mode::Expression => Tok::StartExpression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Tok {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use Tok::*;
|
||||
|
@ -404,10 +414,11 @@ impl StringKind {
|
|||
/// Returns the number of characters in the prefix.
|
||||
pub fn prefix_len(&self) -> TextSize {
|
||||
use StringKind::*;
|
||||
match self {
|
||||
String => TextSize::from(0),
|
||||
RawString | FString | Unicode | Bytes => TextSize::from(1),
|
||||
RawFString | RawBytes => TextSize::from(2),
|
||||
}
|
||||
let len = match self {
|
||||
String => 0,
|
||||
RawString | FString | Unicode | Bytes => 1,
|
||||
RawFString | RawBytes => 2,
|
||||
};
|
||||
len.into()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue