Always wrap in SoftKeywordTransformer

This commit is contained in:
Charlie Marsh 2023-02-21 19:18:42 -05:00
parent 82b91fe9aa
commit 62d88e0b86
4 changed files with 43 additions and 28 deletions

View file

@ -13,10 +13,11 @@
//! //!
//! ``` //! ```
//! use rustpython_parser::lexer::{make_tokenizer, Tok}; //! use rustpython_parser::lexer::{make_tokenizer, Tok};
//! use rustpython_parser::mode::Mode;
//! use rustpython_parser::token::StringKind; //! use rustpython_parser::token::StringKind;
//! //!
//! let source = "x = 'RustPython'"; //! let source = "x = 'RustPython'";
//! let tokens = make_tokenizer(source) //! let tokens = make_tokenizer(source, Mode::Module)
//! .map(|tok| tok.expect("Failed to lex")) //! .map(|tok| tok.expect("Failed to lex"))
//! .collect::<Vec<_>>(); //! .collect::<Vec<_>>();
//! //!
@ -35,6 +36,8 @@
pub use super::token::{StringKind, Tok}; pub use super::token::{StringKind, Tok};
use crate::ast::Location; use crate::ast::Location;
use crate::error::{LexicalError, LexicalErrorType}; use crate::error::{LexicalError, LexicalErrorType};
use crate::mode::Mode;
use crate::soft_keywords::SoftKeywordTransformer;
use num_bigint::BigInt; use num_bigint::BigInt;
use num_traits::identities::Zero; use num_traits::identities::Zero;
use num_traits::Num; use num_traits::Num;
@ -197,27 +200,29 @@ pub type LexResult = Result<Spanned, LexicalError>;
/// # Examples /// # Examples
/// ///
/// ``` /// ```
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::lexer::{make_tokenizer}; /// use rustpython_parser::lexer::{make_tokenizer};
/// ///
/// let source = "def hello(): return 'world'"; /// let source = "def hello(): return 'world'";
/// let tokenizer = make_tokenizer(source); /// let tokenizer = make_tokenizer(source, Mode::Module);
/// ///
/// for token in tokenizer { /// for token in tokenizer {
/// println!("{:?}", token); /// println!("{:?}", token);
/// } /// }
/// ``` /// ```
#[inline] #[inline]
pub fn make_tokenizer(source: &str) -> impl Iterator<Item = LexResult> + '_ { pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
make_tokenizer_located(source, Location::default()) make_tokenizer_located(source, mode, Location::default())
} }
/// Create a new tokenizer from a source string, starting at a given location. /// Create a new tokenizer from a source string, starting at a given location.
/// You probably want to use [`make_tokenizer`] instead. /// You probably want to use [`make_tokenizer`] instead.
pub fn make_tokenizer_located( pub fn make_tokenizer_located(
source: &str, source: &str,
mode: Mode,
start_location: Location, start_location: Location,
) -> impl Iterator<Item = LexResult> + '_ { ) -> impl Iterator<Item = LexResult> + '_ {
Lexer::new(source.chars(), start_location) SoftKeywordTransformer::new(Lexer::new(source.chars(), start_location), mode)
} }
impl<T> Lexer<T> impl<T> Lexer<T>
@ -1210,6 +1215,7 @@ where
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{make_tokenizer, StringKind, Tok}; use super::{make_tokenizer, StringKind, Tok};
use crate::mode::Mode;
use num_bigint::BigInt; use num_bigint::BigInt;
const WINDOWS_EOL: &str = "\r\n"; const WINDOWS_EOL: &str = "\r\n";
@ -1217,7 +1223,7 @@ mod tests {
const UNIX_EOL: &str = "\n"; const UNIX_EOL: &str = "\n";
pub fn lex_source(source: &str) -> Vec<Tok> { pub fn lex_source(source: &str) -> Vec<Tok> {
let lexer = make_tokenizer(source); let lexer = make_tokenizer(source, Mode::Module);
lexer.map(|x| x.unwrap().1).collect() lexer.map(|x| x.unwrap().1).collect()
} }

View file

@ -66,27 +66,29 @@
//! For example, to get a stream of tokens from a given string, one could do this: //! For example, to get a stream of tokens from a given string, one could do this:
//! //!
//! ``` //! ```
//! use rustpython_parser::mode::Mode;
//! use rustpython_parser::lexer::make_tokenizer; //! use rustpython_parser::lexer::make_tokenizer;
//! //!
//! let python_source = r#" //! let python_source = r#"
//! def is_odd(i): //! def is_odd(i):
//! return bool(i & 1) //! return bool(i & 1)
//! "#; //! "#;
//! let mut tokens = make_tokenizer(python_source); //! let mut tokens = make_tokenizer(python_source, Mode::Module);
//! assert!(tokens.all(|t| t.is_ok())); //! assert!(tokens.all(|t| t.is_ok()));
//! ``` //! ```
//! //!
//! These tokens can be directly fed into the parser to generate an AST: //! These tokens can be directly fed into the parser to generate an AST:
//! //!
//! ``` //! ```
//! use rustpython_parser::parser::{parse_tokens, Mode};
//! use rustpython_parser::lexer::make_tokenizer; //! use rustpython_parser::lexer::make_tokenizer;
//! use rustpython_parser::mode::Mode;
//! use rustpython_parser::parser::parse_tokens;
//! //!
//! let python_source = r#" //! let python_source = r#"
//! def is_odd(i): //! def is_odd(i):
//! return bool(i & 1) //! return bool(i & 1)
//! "#; //! "#;
//! let tokens = make_tokenizer(python_source); //! let tokens = make_tokenizer(python_source, Mode::Module);
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>"); //! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
//! //!
//! assert!(ast.is_ok()); //! assert!(ast.is_ok());
@ -131,5 +133,5 @@ mod string;
#[rustfmt::skip] #[rustfmt::skip]
mod python; mod python;
mod context; mod context;
pub mod soft_keywords; mod soft_keywords;
pub mod token; pub mod token;

View file

@ -14,7 +14,6 @@
use crate::lexer::{LexResult, Tok}; use crate::lexer::{LexResult, Tok};
pub use crate::mode::Mode; pub use crate::mode::Mode;
use crate::soft_keywords::SoftKeywordTransformer;
use crate::{ast, error::ParseError, lexer, python}; use crate::{ast, error::ParseError, lexer, python};
use ast::Location; use ast::Location;
use itertools::Itertools; use itertools::Itertools;
@ -107,7 +106,8 @@ pub fn parse_expression_located(
/// parsing: /// parsing:
/// ///
/// ``` /// ```
/// use rustpython_parser::parser::{parse, Mode}; /// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse;
/// ///
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>"); /// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
/// assert!(expr.is_ok()); /// assert!(expr.is_ok());
@ -116,7 +116,8 @@ pub fn parse_expression_located(
/// Alternatively, we can parse a full Python program consisting of multiple lines: /// Alternatively, we can parse a full Python program consisting of multiple lines:
/// ///
/// ``` /// ```
/// use rustpython_parser::parser::{parse, Mode}; /// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse;
/// ///
/// let source = r#" /// let source = r#"
/// class Greeter: /// class Greeter:
@ -139,8 +140,9 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, Pa
/// # Example /// # Example
/// ///
/// ``` /// ```
/// use rustpython_parser::parser::{parse_located, Mode};
/// use rustpython_parser::ast::Location; /// use rustpython_parser::ast::Location;
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse_located;
/// ///
/// let source = r#" /// let source = r#"
/// def fib(i): /// def fib(i):
@ -160,7 +162,7 @@ pub fn parse_located(
source_path: &str, source_path: &str,
location: Location, location: Location,
) -> Result<ast::Mod, ParseError> { ) -> Result<ast::Mod, ParseError> {
let lxr = lexer::make_tokenizer_located(source, location); let lxr = lexer::make_tokenizer_located(source, mode, location);
parse_tokens(lxr, mode, source_path) parse_tokens(lxr, mode, source_path)
} }
@ -174,10 +176,11 @@ pub fn parse_located(
/// them using the [`lexer::make_tokenizer`] function: /// them using the [`lexer::make_tokenizer`] function:
/// ///
/// ``` /// ```
/// use rustpython_parser::parser::{parse_tokens, Mode};
/// use rustpython_parser::lexer::make_tokenizer; /// use rustpython_parser::lexer::make_tokenizer;
/// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse_tokens;
/// ///
/// let expr = parse_tokens(make_tokenizer("1 + 2"), Mode::Expression, "<embedded>"); /// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
/// assert!(expr.is_ok()); /// assert!(expr.is_ok());
/// ``` /// ```
pub fn parse_tokens( pub fn parse_tokens(
@ -190,7 +193,7 @@ pub fn parse_tokens(
.chain(lxr) .chain(lxr)
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
python::TopParser::new() python::TopParser::new()
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter()) .parse(tokenizer.into_iter())
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path)) .map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
} }

View file

@ -19,8 +19,8 @@ pub struct SoftKeywordTransformer<I>
where where
I: Iterator<Item = LexResult>, I: Iterator<Item = LexResult>,
{ {
pub underlying: MultiPeek<I>, underlying: MultiPeek<I>,
pub start_of_line: bool, start_of_line: bool,
} }
impl<I> SoftKeywordTransformer<I> impl<I> SoftKeywordTransformer<I>
@ -84,14 +84,18 @@ where
self.start_of_line = next.as_ref().map_or(false, |lex_result| { self.start_of_line = next.as_ref().map_or(false, |lex_result| {
lex_result.as_ref().map_or(false, |(_, tok, _)| { lex_result.as_ref().map_or(false, |(_, tok, _)| {
matches!( if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
tok, self.start_of_line
Tok::StartModule } else {
| Tok::StartInteractive matches!(
| Tok::Newline tok,
| Tok::Indent Tok::StartModule
| Tok::Dedent | Tok::StartInteractive
) | Tok::Newline
| Tok::Indent
| Tok::Dedent
)
}
}) })
}); });