From 62d88e0b8676fceea13dae5007749f60f41fded2 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 21 Feb 2023 19:18:42 -0500 Subject: [PATCH] Always wrap in SoftKeywordTransformer --- parser/src/lexer.rs | 18 ++++++++++++------ parser/src/lib.rs | 10 ++++++---- parser/src/parser.rs | 19 +++++++++++-------- parser/src/soft_keywords.rs | 24 ++++++++++++++---------- 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index 20e6b14..2ea7867 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -13,10 +13,11 @@ //! //! ``` //! use rustpython_parser::lexer::{make_tokenizer, Tok}; +//! use rustpython_parser::mode::Mode; //! use rustpython_parser::token::StringKind; //! //! let source = "x = 'RustPython'"; -//! let tokens = make_tokenizer(source) +//! let tokens = make_tokenizer(source, Mode::Module) //! .map(|tok| tok.expect("Failed to lex")) //! .collect::>(); //! @@ -35,6 +36,8 @@ pub use super::token::{StringKind, Tok}; use crate::ast::Location; use crate::error::{LexicalError, LexicalErrorType}; +use crate::mode::Mode; +use crate::soft_keywords::SoftKeywordTransformer; use num_bigint::BigInt; use num_traits::identities::Zero; use num_traits::Num; @@ -197,27 +200,29 @@ pub type LexResult = Result; /// # Examples /// /// ``` +/// use rustpython_parser::mode::Mode; /// use rustpython_parser::lexer::{make_tokenizer}; /// /// let source = "def hello(): return 'world'"; -/// let tokenizer = make_tokenizer(source); +/// let tokenizer = make_tokenizer(source, Mode::Module); /// /// for token in tokenizer { /// println!("{:?}", token); /// } /// ``` #[inline] -pub fn make_tokenizer(source: &str) -> impl Iterator + '_ { - make_tokenizer_located(source, Location::default()) +pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator + '_ { + make_tokenizer_located(source, mode, Location::default()) } /// Create a new tokenizer from a source string, starting at a given location. /// You probably want to use [`make_tokenizer`] instead. pub fn make_tokenizer_located( source: &str, + mode: Mode, start_location: Location, ) -> impl Iterator + '_ { - Lexer::new(source.chars(), start_location) + SoftKeywordTransformer::new(Lexer::new(source.chars(), start_location), mode) } impl Lexer @@ -1210,6 +1215,7 @@ where #[cfg(test)] mod tests { use super::{make_tokenizer, StringKind, Tok}; + use crate::mode::Mode; use num_bigint::BigInt; const WINDOWS_EOL: &str = "\r\n"; @@ -1217,7 +1223,7 @@ mod tests { const UNIX_EOL: &str = "\n"; pub fn lex_source(source: &str) -> Vec { - let lexer = make_tokenizer(source); + let lexer = make_tokenizer(source, Mode::Module); lexer.map(|x| x.unwrap().1).collect() } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 9cc1dbc..bfb56d7 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -66,27 +66,29 @@ //! For example, to get a stream of tokens from a given string, one could do this: //! //! ``` +//! use rustpython_parser::mode::Mode; //! use rustpython_parser::lexer::make_tokenizer; //! //! let python_source = r#" //! def is_odd(i): //! return bool(i & 1) //! "#; -//! let mut tokens = make_tokenizer(python_source); +//! let mut tokens = make_tokenizer(python_source, Mode::Module); //! assert!(tokens.all(|t| t.is_ok())); //! ``` //! //! These tokens can be directly fed into the parser to generate an AST: //! //! ``` -//! use rustpython_parser::parser::{parse_tokens, Mode}; //! use rustpython_parser::lexer::make_tokenizer; +//! use rustpython_parser::mode::Mode; +//! use rustpython_parser::parser::parse_tokens; //! //! let python_source = r#" //! def is_odd(i): //! return bool(i & 1) //! "#; -//! let tokens = make_tokenizer(python_source); +//! let tokens = make_tokenizer(python_source, Mode::Module); //! let ast = parse_tokens(tokens, Mode::Module, ""); //! //! assert!(ast.is_ok()); @@ -131,5 +133,5 @@ mod string; #[rustfmt::skip] mod python; mod context; -pub mod soft_keywords; +mod soft_keywords; pub mod token; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 65f9ea4..e0865b2 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -14,7 +14,6 @@ use crate::lexer::{LexResult, Tok}; pub use crate::mode::Mode; -use crate::soft_keywords::SoftKeywordTransformer; use crate::{ast, error::ParseError, lexer, python}; use ast::Location; use itertools::Itertools; @@ -107,7 +106,8 @@ pub fn parse_expression_located( /// parsing: /// /// ``` -/// use rustpython_parser::parser::{parse, Mode}; +/// use rustpython_parser::mode::Mode; +/// use rustpython_parser::parser::parse; /// /// let expr = parse("1 + 2", Mode::Expression, ""); /// assert!(expr.is_ok()); @@ -116,7 +116,8 @@ pub fn parse_expression_located( /// Alternatively, we can parse a full Python program consisting of multiple lines: /// /// ``` -/// use rustpython_parser::parser::{parse, Mode}; +/// use rustpython_parser::mode::Mode; +/// use rustpython_parser::parser::parse; /// /// let source = r#" /// class Greeter: @@ -139,8 +140,9 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result Result { - let lxr = lexer::make_tokenizer_located(source, location); + let lxr = lexer::make_tokenizer_located(source, mode, location); parse_tokens(lxr, mode, source_path) } @@ -174,10 +176,11 @@ pub fn parse_located( /// them using the [`lexer::make_tokenizer`] function: /// /// ``` -/// use rustpython_parser::parser::{parse_tokens, Mode}; /// use rustpython_parser::lexer::make_tokenizer; +/// use rustpython_parser::mode::Mode; +/// use rustpython_parser::parser::parse_tokens; /// -/// let expr = parse_tokens(make_tokenizer("1 + 2"), Mode::Expression, ""); +/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, ""); /// assert!(expr.is_ok()); /// ``` pub fn parse_tokens( @@ -190,7 +193,7 @@ pub fn parse_tokens( .chain(lxr) .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); python::TopParser::new() - .parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter()) + .parse(tokenizer.into_iter()) .map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path)) } diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs index a425625..a029ccd 100644 --- a/parser/src/soft_keywords.rs +++ b/parser/src/soft_keywords.rs @@ -19,8 +19,8 @@ pub struct SoftKeywordTransformer where I: Iterator, { - pub underlying: MultiPeek, - pub start_of_line: bool, + underlying: MultiPeek, + start_of_line: bool, } impl SoftKeywordTransformer @@ -84,14 +84,18 @@ where self.start_of_line = next.as_ref().map_or(false, |lex_result| { lex_result.as_ref().map_or(false, |(_, tok, _)| { - matches!( - tok, - Tok::StartModule - | Tok::StartInteractive - | Tok::Newline - | Tok::Indent - | Tok::Dedent - ) + if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) { + self.start_of_line + } else { + matches!( + tok, + Tok::StartModule + | Tok::StartInteractive + | Tok::Newline + | Tok::Indent + | Tok::Dedent + ) + } }) });