mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-12 15:45:22 +00:00
Always wrap in SoftKeywordTransformer
This commit is contained in:
parent
82b91fe9aa
commit
62d88e0b86
4 changed files with 43 additions and 28 deletions
|
@ -13,10 +13,11 @@
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rustpython_parser::lexer::{make_tokenizer, Tok};
|
//! use rustpython_parser::lexer::{make_tokenizer, Tok};
|
||||||
|
//! use rustpython_parser::mode::Mode;
|
||||||
//! use rustpython_parser::token::StringKind;
|
//! use rustpython_parser::token::StringKind;
|
||||||
//!
|
//!
|
||||||
//! let source = "x = 'RustPython'";
|
//! let source = "x = 'RustPython'";
|
||||||
//! let tokens = make_tokenizer(source)
|
//! let tokens = make_tokenizer(source, Mode::Module)
|
||||||
//! .map(|tok| tok.expect("Failed to lex"))
|
//! .map(|tok| tok.expect("Failed to lex"))
|
||||||
//! .collect::<Vec<_>>();
|
//! .collect::<Vec<_>>();
|
||||||
//!
|
//!
|
||||||
|
@ -35,6 +36,8 @@
|
||||||
pub use super::token::{StringKind, Tok};
|
pub use super::token::{StringKind, Tok};
|
||||||
use crate::ast::Location;
|
use crate::ast::Location;
|
||||||
use crate::error::{LexicalError, LexicalErrorType};
|
use crate::error::{LexicalError, LexicalErrorType};
|
||||||
|
use crate::mode::Mode;
|
||||||
|
use crate::soft_keywords::SoftKeywordTransformer;
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
use num_traits::identities::Zero;
|
use num_traits::identities::Zero;
|
||||||
use num_traits::Num;
|
use num_traits::Num;
|
||||||
|
@ -197,27 +200,29 @@ pub type LexResult = Result<Spanned, LexicalError>;
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
|
/// use rustpython_parser::mode::Mode;
|
||||||
/// use rustpython_parser::lexer::{make_tokenizer};
|
/// use rustpython_parser::lexer::{make_tokenizer};
|
||||||
///
|
///
|
||||||
/// let source = "def hello(): return 'world'";
|
/// let source = "def hello(): return 'world'";
|
||||||
/// let tokenizer = make_tokenizer(source);
|
/// let tokenizer = make_tokenizer(source, Mode::Module);
|
||||||
///
|
///
|
||||||
/// for token in tokenizer {
|
/// for token in tokenizer {
|
||||||
/// println!("{:?}", token);
|
/// println!("{:?}", token);
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn make_tokenizer(source: &str) -> impl Iterator<Item = LexResult> + '_ {
|
pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
|
||||||
make_tokenizer_located(source, Location::default())
|
make_tokenizer_located(source, mode, Location::default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new tokenizer from a source string, starting at a given location.
|
/// Create a new tokenizer from a source string, starting at a given location.
|
||||||
/// You probably want to use [`make_tokenizer`] instead.
|
/// You probably want to use [`make_tokenizer`] instead.
|
||||||
pub fn make_tokenizer_located(
|
pub fn make_tokenizer_located(
|
||||||
source: &str,
|
source: &str,
|
||||||
|
mode: Mode,
|
||||||
start_location: Location,
|
start_location: Location,
|
||||||
) -> impl Iterator<Item = LexResult> + '_ {
|
) -> impl Iterator<Item = LexResult> + '_ {
|
||||||
Lexer::new(source.chars(), start_location)
|
SoftKeywordTransformer::new(Lexer::new(source.chars(), start_location), mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Lexer<T>
|
impl<T> Lexer<T>
|
||||||
|
@ -1210,6 +1215,7 @@ where
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{make_tokenizer, StringKind, Tok};
|
use super::{make_tokenizer, StringKind, Tok};
|
||||||
|
use crate::mode::Mode;
|
||||||
use num_bigint::BigInt;
|
use num_bigint::BigInt;
|
||||||
|
|
||||||
const WINDOWS_EOL: &str = "\r\n";
|
const WINDOWS_EOL: &str = "\r\n";
|
||||||
|
@ -1217,7 +1223,7 @@ mod tests {
|
||||||
const UNIX_EOL: &str = "\n";
|
const UNIX_EOL: &str = "\n";
|
||||||
|
|
||||||
pub fn lex_source(source: &str) -> Vec<Tok> {
|
pub fn lex_source(source: &str) -> Vec<Tok> {
|
||||||
let lexer = make_tokenizer(source);
|
let lexer = make_tokenizer(source, Mode::Module);
|
||||||
lexer.map(|x| x.unwrap().1).collect()
|
lexer.map(|x| x.unwrap().1).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -66,27 +66,29 @@
|
||||||
//! For example, to get a stream of tokens from a given string, one could do this:
|
//! For example, to get a stream of tokens from a given string, one could do this:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
|
//! use rustpython_parser::mode::Mode;
|
||||||
//! use rustpython_parser::lexer::make_tokenizer;
|
//! use rustpython_parser::lexer::make_tokenizer;
|
||||||
//!
|
//!
|
||||||
//! let python_source = r#"
|
//! let python_source = r#"
|
||||||
//! def is_odd(i):
|
//! def is_odd(i):
|
||||||
//! return bool(i & 1)
|
//! return bool(i & 1)
|
||||||
//! "#;
|
//! "#;
|
||||||
//! let mut tokens = make_tokenizer(python_source);
|
//! let mut tokens = make_tokenizer(python_source, Mode::Module);
|
||||||
//! assert!(tokens.all(|t| t.is_ok()));
|
//! assert!(tokens.all(|t| t.is_ok()));
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! These tokens can be directly fed into the parser to generate an AST:
|
//! These tokens can be directly fed into the parser to generate an AST:
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```
|
||||||
//! use rustpython_parser::parser::{parse_tokens, Mode};
|
|
||||||
//! use rustpython_parser::lexer::make_tokenizer;
|
//! use rustpython_parser::lexer::make_tokenizer;
|
||||||
|
//! use rustpython_parser::mode::Mode;
|
||||||
|
//! use rustpython_parser::parser::parse_tokens;
|
||||||
//!
|
//!
|
||||||
//! let python_source = r#"
|
//! let python_source = r#"
|
||||||
//! def is_odd(i):
|
//! def is_odd(i):
|
||||||
//! return bool(i & 1)
|
//! return bool(i & 1)
|
||||||
//! "#;
|
//! "#;
|
||||||
//! let tokens = make_tokenizer(python_source);
|
//! let tokens = make_tokenizer(python_source, Mode::Module);
|
||||||
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
|
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
|
||||||
//!
|
//!
|
||||||
//! assert!(ast.is_ok());
|
//! assert!(ast.is_ok());
|
||||||
|
@ -131,5 +133,5 @@ mod string;
|
||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
mod python;
|
mod python;
|
||||||
mod context;
|
mod context;
|
||||||
pub mod soft_keywords;
|
mod soft_keywords;
|
||||||
pub mod token;
|
pub mod token;
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
|
|
||||||
use crate::lexer::{LexResult, Tok};
|
use crate::lexer::{LexResult, Tok};
|
||||||
pub use crate::mode::Mode;
|
pub use crate::mode::Mode;
|
||||||
use crate::soft_keywords::SoftKeywordTransformer;
|
|
||||||
use crate::{ast, error::ParseError, lexer, python};
|
use crate::{ast, error::ParseError, lexer, python};
|
||||||
use ast::Location;
|
use ast::Location;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
@ -107,7 +106,8 @@ pub fn parse_expression_located(
|
||||||
/// parsing:
|
/// parsing:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::parser::{parse, Mode};
|
/// use rustpython_parser::mode::Mode;
|
||||||
|
/// use rustpython_parser::parser::parse;
|
||||||
///
|
///
|
||||||
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
|
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
|
||||||
/// assert!(expr.is_ok());
|
/// assert!(expr.is_ok());
|
||||||
|
@ -116,7 +116,8 @@ pub fn parse_expression_located(
|
||||||
/// Alternatively, we can parse a full Python program consisting of multiple lines:
|
/// Alternatively, we can parse a full Python program consisting of multiple lines:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::parser::{parse, Mode};
|
/// use rustpython_parser::mode::Mode;
|
||||||
|
/// use rustpython_parser::parser::parse;
|
||||||
///
|
///
|
||||||
/// let source = r#"
|
/// let source = r#"
|
||||||
/// class Greeter:
|
/// class Greeter:
|
||||||
|
@ -139,8 +140,9 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, Pa
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::parser::{parse_located, Mode};
|
|
||||||
/// use rustpython_parser::ast::Location;
|
/// use rustpython_parser::ast::Location;
|
||||||
|
/// use rustpython_parser::mode::Mode;
|
||||||
|
/// use rustpython_parser::parser::parse_located;
|
||||||
///
|
///
|
||||||
/// let source = r#"
|
/// let source = r#"
|
||||||
/// def fib(i):
|
/// def fib(i):
|
||||||
|
@ -160,7 +162,7 @@ pub fn parse_located(
|
||||||
source_path: &str,
|
source_path: &str,
|
||||||
location: Location,
|
location: Location,
|
||||||
) -> Result<ast::Mod, ParseError> {
|
) -> Result<ast::Mod, ParseError> {
|
||||||
let lxr = lexer::make_tokenizer_located(source, location);
|
let lxr = lexer::make_tokenizer_located(source, mode, location);
|
||||||
parse_tokens(lxr, mode, source_path)
|
parse_tokens(lxr, mode, source_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,10 +176,11 @@ pub fn parse_located(
|
||||||
/// them using the [`lexer::make_tokenizer`] function:
|
/// them using the [`lexer::make_tokenizer`] function:
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use rustpython_parser::parser::{parse_tokens, Mode};
|
|
||||||
/// use rustpython_parser::lexer::make_tokenizer;
|
/// use rustpython_parser::lexer::make_tokenizer;
|
||||||
|
/// use rustpython_parser::mode::Mode;
|
||||||
|
/// use rustpython_parser::parser::parse_tokens;
|
||||||
///
|
///
|
||||||
/// let expr = parse_tokens(make_tokenizer("1 + 2"), Mode::Expression, "<embedded>");
|
/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
|
||||||
/// assert!(expr.is_ok());
|
/// assert!(expr.is_ok());
|
||||||
/// ```
|
/// ```
|
||||||
pub fn parse_tokens(
|
pub fn parse_tokens(
|
||||||
|
@ -190,7 +193,7 @@ pub fn parse_tokens(
|
||||||
.chain(lxr)
|
.chain(lxr)
|
||||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||||
python::TopParser::new()
|
python::TopParser::new()
|
||||||
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
|
.parse(tokenizer.into_iter())
|
||||||
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,8 @@ pub struct SoftKeywordTransformer<I>
|
||||||
where
|
where
|
||||||
I: Iterator<Item = LexResult>,
|
I: Iterator<Item = LexResult>,
|
||||||
{
|
{
|
||||||
pub underlying: MultiPeek<I>,
|
underlying: MultiPeek<I>,
|
||||||
pub start_of_line: bool,
|
start_of_line: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I> SoftKeywordTransformer<I>
|
impl<I> SoftKeywordTransformer<I>
|
||||||
|
@ -84,6 +84,9 @@ where
|
||||||
|
|
||||||
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
||||||
lex_result.as_ref().map_or(false, |(_, tok, _)| {
|
lex_result.as_ref().map_or(false, |(_, tok, _)| {
|
||||||
|
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
|
||||||
|
self.start_of_line
|
||||||
|
} else {
|
||||||
matches!(
|
matches!(
|
||||||
tok,
|
tok,
|
||||||
Tok::StartModule
|
Tok::StartModule
|
||||||
|
@ -92,6 +95,7 @@ where
|
||||||
| Tok::Indent
|
| Tok::Indent
|
||||||
| Tok::Dedent
|
| Tok::Dedent
|
||||||
)
|
)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue