numerous refactoring

- Split parser core and compiler core. Fix #14 - AST int type to `u32` - Updated asdl_rs.py and update_asdl.sh fix #6 - Use `ruff_python_ast::SourceLocation` for Python source location. Deleted our own Location. - Renamed ast::Located to ast::Attributed to distinguish terms for TextSize and SourceLocation - `ast::<Node>`s for TextSize located ast. `ast::located::<Node>` for Python source located ast. - And also strictly renaming `located` to refer only python location related interfaces. - `SourceLocator` to convert locations. - New `source-code` features of to disable python locations when unnecessary. - Also including fully merging https://github.com/astral-sh/RustPython/pull/4 closes #9
2025-09-03 17:10:58 +00:00 · 2023-05-10 02:36:52 +09:00 · 2023-05-10 02:36:52 +09:00 · a3d9d8cb14
commit a3d9d8cb14
parent 09a6afdd04
29 changed files with 9737 additions and 12000 deletions
--- a/parser/src/lexer.rs
+++ b/parser/src/lexer.rs
@ -28,11 +28,11 @@
 //!
 //! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
 use crate::{
-    mode::Mode,
    soft_keywords::SoftKeywordTransformer,
    string::FStringErrorType,
    text_size::{TextLen, TextRange, TextSize},
    token::{StringKind, Tok},
+    Mode,
 };
 use log::trace;
 use num_bigint::BigInt;
--- a/parser/src/lib.rs
+++ b/parser/src/lib.rs
@ -113,20 +113,17 @@
 #![doc(html_root_url = "https://docs.rs/rustpython-parser/")]

 pub use rustpython_ast as ast;
-pub use rustpython_compiler_core::text_size;
-pub use rustpython_compiler_core::ConversionFlag;
+pub use rustpython_parser_core::{source_code, text_size, Mode};

 mod function;
 // Skip flattening lexer to distinguish from full parser
 mod context;
 pub mod lexer;
-mod mode;
 mod parser;
 mod soft_keywords;
 mod string;
 mod token;

-pub use mode::Mode;
 pub use parser::{
    parse, parse_expression, parse_expression_located, parse_located, parse_program, parse_tokens,
    ParseError, ParseErrorType,
--- a/parser/src/mode.rs
+++ b/parser/src/mode.rs
@ -1,55 +0,0 @@
-//! Control in the different modes by which a source file can be parsed.
-use crate::token::Tok;
-
-/// The mode argument specifies in what way code must be parsed.
-#[derive(Clone, Copy)]
-pub enum Mode {
-    /// The code consists of a sequence of statements.
-    Module,
-    /// The code consists of a sequence of interactive statement.
-    Interactive,
-    /// The code consists of a single expression.
-    Expression,
-}
-
-impl Mode {
-    pub(crate) fn to_marker(self) -> Tok {
-        match self {
-            Self::Module => Tok::StartModule,
-            Self::Interactive => Tok::StartInteractive,
-            Self::Expression => Tok::StartExpression,
-        }
-    }
-}
-
-impl From<rustpython_compiler_core::Mode> for Mode {
-    fn from(mode: rustpython_compiler_core::Mode) -> Self {
-        use rustpython_compiler_core::Mode as CompileMode;
-        match mode {
-            CompileMode::Exec => Self::Module,
-            CompileMode::Eval => Self::Expression,
-            CompileMode::Single | CompileMode::BlockExpr => Self::Interactive,
-        }
-    }
-}
-
-impl std::str::FromStr for Mode {
-    type Err = ModeParseError;
-    fn from_str(s: &str) -> Result<Self, ModeParseError> {
-        match s {
-            "exec" | "single" => Ok(Mode::Module),
-            "eval" => Ok(Mode::Expression),
-            _ => Err(ModeParseError(())),
-        }
-    }
-}
-
-/// Returned when a given mode is not valid.
-#[derive(Debug)]
-pub struct ModeParseError(());
-
-impl std::fmt::Display for ModeParseError {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, r#"mode must be "exec", "eval", or "single""#)
-    }
-}
--- a/parser/src/parser.rs
+++ b/parser/src/parser.rs
@ -15,10 +15,10 @@
 use crate::{
    ast,
    lexer::{self, LexResult, LexicalError, LexicalErrorType},
-    mode::Mode,
    python,
    text_size::TextSize,
    token::Tok,
+    Mode,
 };
 use itertools::Itertools;
 use std::iter;
@ -187,7 +187,7 @@ pub fn parse_tokens(
    mode: Mode,
    source_path: &str,
 ) -> Result<ast::Mod, ParseError> {
-    let marker_token = (mode.to_marker(), Default::default());
+    let marker_token = (Tok::start_marker(mode), Default::default());
    let lexer = iter::once(Ok(marker_token))
        .chain(lxr)
        .filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
@ -202,7 +202,7 @@ pub fn parse_tokens(

 /// Represents represent errors that occur during parsing and are
 /// returned by the `parse_*` functions.
-pub type ParseError = rustpython_compiler_core::BaseError<ParseErrorType>;
+pub type ParseError = rustpython_parser_core::BaseError<ParseErrorType>;

 /// Represents the different types of errors that can occur during parsing.
 #[derive(Debug, PartialEq)]
--- a/parser/src/python.lalrpop
+++ b/parser/src/python.lalrpop
@ -10,6 +10,7 @@ use crate::{
    context::set_context,
    string::parse_strings,
    token::{self, StringKind},
+    text_size::TextSize,
 };
 use num_bigint::BigInt;

@ -254,7 +255,7 @@ ImportStatement: ast::Stmt = {
    },
 };

-ImportFromLocation: (Option<usize>, Option<String>) = {
+ImportFromLocation: (Option<u32>, Option<String>) = {
    <dots: ImportDots*> <name:DottedName> => {
        (Some(dots.iter().sum()), Some(name))
    },
@ -263,7 +264,7 @@ ImportFromLocation: (Option<usize>, Option<String>) = {
    },
 };

-ImportDots: usize = {
+ImportDots: u32 = {
    "..." => 3,
    "." => 1,
 };
@ -1721,7 +1722,7 @@ ArgumentList: ArgumentList = {
    }
 };

-FunctionArgument: (Option<(crate::text_size::TextSize, crate::text_size::TextSize, Option<String>)>, ast::Expr) = {
+FunctionArgument: (Option<(TextSize, TextSize, Option<String>)>, ast::Expr) = {
    <location:@L> <e:NamedExpressionTest> <c:CompFor?> <end_location:@R> => {
        let expr = match c {
            Some(c) => ast::Expr::new(
@ -1775,7 +1776,7 @@ Identifier: String = <s:name> => s;

 // Hook external lexer:
 extern {
-    type Location = crate::text_size::TextSize;
+    type Location = TextSize;
    type Error = LexicalError;

    enum token::Tok {
--- a/parser/src/python.rs
+++ b/parser/src/python.rs
--- a/parser/src/soft_keywords.rs
+++ b/parser/src/soft_keywords.rs
@ -1,4 +1,4 @@
-use crate::{lexer::LexResult, mode::Mode, token::Tok};
+use crate::{lexer::LexResult, token::Tok, Mode};
 use itertools::{Itertools, MultiPeek};

 /// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
--- a/parser/src/string.rs
+++ b/parser/src/string.rs
@ -4,13 +4,16 @@
 // regular strings. Since the parser has no definition of f-string formats (Pending PEP 701)
 // we have to do the parsing here, manually.
 use crate::{
-    ast::{self, Constant, ConversionFlag, Expr, ExprKind},
+    ast::{self, Constant, Expr, ExprKind},
    lexer::{LexicalError, LexicalErrorType},
    parser::{parse_expression_located, LalrpopError, ParseError, ParseErrorType},
    token::{StringKind, Tok},
 };
 use itertools::Itertools;
-use rustpython_compiler_core::text_size::{TextLen, TextSize};
+use rustpython_parser_core::{
+    text_size::{TextLen, TextSize},
+    ConversionFlag,
+};

 // unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
 const MAX_UNICODE_NAME: usize = 88;
--- a/parser/src/token.rs
+++ b/parser/src/token.rs
@ -4,7 +4,7 @@
 //! loosely based on the token definitions found in the [CPython source].
 //!
 //! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h
-use crate::text_size::TextSize;
+use crate::{text_size::TextSize, Mode};
 use num_bigint::BigInt;
 use std::fmt;

@ -196,6 +196,16 @@ pub enum Tok {
    StartExpression,
 }

+impl Tok {
+    pub fn start_marker(mode: Mode) -> Self {
+        match mode {
+            Mode::Module => Tok::StartModule,
+            Mode::Interactive => Tok::StartInteractive,
+            Mode::Expression => Tok::StartExpression,
+        }
+    }
+}
+
 impl fmt::Display for Tok {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use Tok::*;
@ -404,10 +414,11 @@ impl StringKind {
    /// Returns the number of characters in the prefix.
    pub fn prefix_len(&self) -> TextSize {
        use StringKind::*;
-        match self {
-            String => TextSize::from(0),
-            RawString | FString | Unicode | Bytes => TextSize::from(1),
-            RawFString | RawBytes => TextSize::from(2),
-        }
+        let len = match self {
+            String => 0,
+            RawString | FString | Unicode | Bytes => 1,
+            RawFString | RawBytes => 2,
+        };
+        len.into()
    }
 }