make_tokenizer -> lex to integrate terms

we don't distinguish scanner or tokenizer from lexer
This commit is contained in:
Jeong YunWon 2023-02-22 15:58:51 +09:00
parent 39fc23cf92
commit 8580e4ebb5
4 changed files with 27 additions and 27 deletions

View file

@ -4,7 +4,7 @@
//! governing what is and is not a valid token are defined in the Python reference //! governing what is and is not a valid token are defined in the Python reference
//! guide section on [Lexical analysis]. //! guide section on [Lexical analysis].
//! //!
//! The primary function in this module is [`make_tokenizer`], which takes a string slice //! The primary function in this module is [`lex`], which takes a string slice
//! and returns an iterator over the tokens in the source code. The tokens are currently returned //! and returns an iterator over the tokens in the source code. The tokens are currently returned
//! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the //! as a `Result<Spanned, LexicalError>`, where [`Spanned`] is a tuple containing the
//! start and end [`Location`] and a [`Tok`] denoting the token. //! start and end [`Location`] and a [`Tok`] denoting the token.
@ -12,12 +12,12 @@
//! # Example //! # Example
//! //!
//! ``` //! ```
//! use rustpython_parser::lexer::{make_tokenizer, Tok}; //! use rustpython_parser::lexer::{lex, Tok};
//! use rustpython_parser::mode::Mode; //! use rustpython_parser::mode::Mode;
//! use rustpython_parser::token::StringKind; //! use rustpython_parser::token::StringKind;
//! //!
//! let source = "x = 'RustPython'"; //! let source = "x = 'RustPython'";
//! let tokens = make_tokenizer(source, Mode::Module) //! let tokens = lex(source, Mode::Module)
//! .map(|tok| tok.expect("Failed to lex")) //! .map(|tok| tok.expect("Failed to lex"))
//! .collect::<Vec<_>>(); //! .collect::<Vec<_>>();
//! //!
@ -195,29 +195,29 @@ pub type Spanned = (Location, Tok, Location);
/// The result of lexing a token. /// The result of lexing a token.
pub type LexResult = Result<Spanned, LexicalError>; pub type LexResult = Result<Spanned, LexicalError>;
/// Create a new tokenizer from a source string. /// Create a new lexer from a source string.
/// ///
/// # Examples /// # Examples
/// ///
/// ``` /// ```
/// use rustpython_parser::mode::Mode; /// use rustpython_parser::mode::Mode;
/// use rustpython_parser::lexer::{make_tokenizer}; /// use rustpython_parser::lexer::{lex};
/// ///
/// let source = "def hello(): return 'world'"; /// let source = "def hello(): return 'world'";
/// let tokenizer = make_tokenizer(source, Mode::Module); /// let lexer = lex(source, Mode::Module);
/// ///
/// for token in tokenizer { /// for token in lexer {
/// println!("{:?}", token); /// println!("{:?}", token);
/// } /// }
/// ``` /// ```
#[inline] #[inline]
pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ { pub fn lex(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
make_tokenizer_located(source, mode, Location::default()) lex_located(source, mode, Location::default())
} }
/// Create a new tokenizer from a source string, starting at a given location. /// Create a new lexer from a source string, starting at a given location.
/// You probably want to use [`make_tokenizer`] instead. /// You probably want to use [`lex`] instead.
pub fn make_tokenizer_located( pub fn lex_located(
source: &str, source: &str,
mode: Mode, mode: Mode,
start_location: Location, start_location: Location,
@ -230,7 +230,7 @@ where
T: Iterator<Item = char>, T: Iterator<Item = char>,
{ {
/// Create a new lexer from T and a starting location. You probably want to use /// Create a new lexer from T and a starting location. You probably want to use
/// [`make_tokenizer`] instead. /// [`lex`] instead.
pub fn new(input: T, start: Location) -> Self { pub fn new(input: T, start: Location) -> Self {
let mut lxr = Lexer { let mut lxr = Lexer {
at_begin_of_line: true, at_begin_of_line: true,
@ -1320,7 +1320,7 @@ impl std::fmt::Display for LexicalErrorType {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{make_tokenizer, StringKind, Tok}; use super::{lex, StringKind, Tok};
use crate::mode::Mode; use crate::mode::Mode;
use num_bigint::BigInt; use num_bigint::BigInt;
@ -1329,7 +1329,7 @@ mod tests {
const UNIX_EOL: &str = "\n"; const UNIX_EOL: &str = "\n";
pub fn lex_source(source: &str) -> Vec<Tok> { pub fn lex_source(source: &str) -> Vec<Tok> {
let lexer = make_tokenizer(source, Mode::Module); let lexer = lex(source, Mode::Module);
lexer.map(|x| x.unwrap().1).collect() lexer.map(|x| x.unwrap().1).collect()
} }

View file

@ -67,20 +67,20 @@
//! //!
//! ``` //! ```
//! use rustpython_parser::mode::Mode; //! use rustpython_parser::mode::Mode;
//! use rustpython_parser::lexer::make_tokenizer; //! use rustpython_parser::lexer::lex;
//! //!
//! let python_source = r#" //! let python_source = r#"
//! def is_odd(i): //! def is_odd(i):
//! return bool(i & 1) //! return bool(i & 1)
//! "#; //! "#;
//! let mut tokens = make_tokenizer(python_source, Mode::Module); //! let mut tokens = lex(python_source, Mode::Module);
//! assert!(tokens.all(|t| t.is_ok())); //! assert!(tokens.all(|t| t.is_ok()));
//! ``` //! ```
//! //!
//! These tokens can be directly fed into the parser to generate an AST: //! These tokens can be directly fed into the parser to generate an AST:
//! //!
//! ``` //! ```
//! use rustpython_parser::lexer::make_tokenizer; //! use rustpython_parser::lexer::lex;
//! use rustpython_parser::mode::Mode; //! use rustpython_parser::mode::Mode;
//! use rustpython_parser::parser::parse_tokens; //! use rustpython_parser::parser::parse_tokens;
//! //!
@ -88,7 +88,7 @@
//! def is_odd(i): //! def is_odd(i):
//! return bool(i & 1) //! return bool(i & 1)
//! "#; //! "#;
//! let tokens = make_tokenizer(python_source, Mode::Module); //! let tokens = lex(python_source, Mode::Module);
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>"); //! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
//! //!
//! assert!(ast.is_ok()); //! assert!(ast.is_ok());

View file

@ -164,7 +164,7 @@ pub fn parse_located(
source_path: &str, source_path: &str,
location: Location, location: Location,
) -> Result<ast::Mod, ParseError> { ) -> Result<ast::Mod, ParseError> {
let lxr = lexer::make_tokenizer_located(source, mode, location); let lxr = lexer::lex_located(source, mode, location);
parse_tokens(lxr, mode, source_path) parse_tokens(lxr, mode, source_path)
} }
@ -175,14 +175,14 @@ pub fn parse_located(
/// # Example /// # Example
/// ///
/// As an example, instead of parsing a string, we can parse a list of tokens after we generate /// As an example, instead of parsing a string, we can parse a list of tokens after we generate
/// them using the [`lexer::make_tokenizer`] function: /// them using the [`lexer::lex`] function:
/// ///
/// ``` /// ```
/// use rustpython_parser::lexer::make_tokenizer; /// use rustpython_parser::lexer::lex;
/// use rustpython_parser::mode::Mode; /// use rustpython_parser::mode::Mode;
/// use rustpython_parser::parser::parse_tokens; /// use rustpython_parser::parser::parse_tokens;
/// ///
/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, "<embedded>"); /// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
/// assert!(expr.is_ok()); /// assert!(expr.is_ok());
/// ``` /// ```
pub fn parse_tokens( pub fn parse_tokens(
@ -191,11 +191,11 @@ pub fn parse_tokens(
source_path: &str, source_path: &str,
) -> Result<ast::Mod, ParseError> { ) -> Result<ast::Mod, ParseError> {
let marker_token = (Default::default(), mode.to_marker(), Default::default()); let marker_token = (Default::default(), mode.to_marker(), Default::default());
let tokenizer = iter::once(Ok(marker_token)) let lexer = iter::once(Ok(marker_token))
.chain(lxr) .chain(lxr)
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); .filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
python::TopParser::new() python::TopParser::new()
.parse(tokenizer.into_iter()) .parse(lexer.into_iter())
.map_err(|e| parse_error_from_lalrpop(e, source_path)) .map_err(|e| parse_error_from_lalrpop(e, source_path))
} }

View file

@ -27,9 +27,9 @@ impl<I> SoftKeywordTransformer<I>
where where
I: Iterator<Item = LexResult>, I: Iterator<Item = LexResult>,
{ {
pub fn new(tokenizer: I, mode: Mode) -> Self { pub fn new(lexer: I, mode: Mode) -> Self {
Self { Self {
underlying: tokenizer.multipeek(), underlying: lexer.multipeek(),
start_of_line: matches!(mode, Mode::Interactive | Mode::Module), start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
} }
} }