mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-09 22:25:23 +00:00
Add full-lexer feature (#36)
This commit is contained in:
parent
dd4cc25227
commit
27e3873dc2
7 changed files with 1216 additions and 1188 deletions
|
@ -13,6 +13,7 @@ default = ["location"]
|
||||||
location = ["rustpython-ast/location", "rustpython-parser-core/location"]
|
location = ["rustpython-ast/location", "rustpython-parser-core/location"]
|
||||||
serde = ["dep:serde", "rustpython-parser-core/serde"]
|
serde = ["dep:serde", "rustpython-parser-core/serde"]
|
||||||
all-nodes-with-ranges = ["rustpython-ast/all-nodes-with-ranges"]
|
all-nodes-with-ranges = ["rustpython-ast/all-nodes-with-ranges"]
|
||||||
|
full-lexer = []
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
|
|
|
@ -450,6 +450,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lex a single comment.
|
/// Lex a single comment.
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
fn lex_comment(&mut self) -> LexResult {
|
fn lex_comment(&mut self) -> LexResult {
|
||||||
let start_pos = self.get_pos();
|
let start_pos = self.get_pos();
|
||||||
let mut value = String::new();
|
let mut value = String::new();
|
||||||
|
@ -465,6 +466,20 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Discard comment if full-lexer is not enabled.
|
||||||
|
#[cfg(not(feature = "full-lexer"))]
|
||||||
|
fn lex_comment(&mut self) {
|
||||||
|
loop {
|
||||||
|
match self.window[0] {
|
||||||
|
Some('\n' | '\r') | None => {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Some(_) => {}
|
||||||
|
}
|
||||||
|
self.next_char().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Lex a string literal.
|
/// Lex a string literal.
|
||||||
fn lex_string(&mut self, kind: StringKind) -> LexResult {
|
fn lex_string(&mut self, kind: StringKind) -> LexResult {
|
||||||
let start_pos = self.get_pos();
|
let start_pos = self.get_pos();
|
||||||
|
@ -611,8 +626,9 @@ where
|
||||||
tabs += 1;
|
tabs += 1;
|
||||||
}
|
}
|
||||||
Some('#') => {
|
Some('#') => {
|
||||||
let comment = self.lex_comment()?;
|
let _comment = self.lex_comment();
|
||||||
self.emit(comment);
|
#[cfg(feature = "full-lexer")]
|
||||||
|
self.emit(_comment?);
|
||||||
spaces = 0;
|
spaces = 0;
|
||||||
tabs = 0;
|
tabs = 0;
|
||||||
}
|
}
|
||||||
|
@ -753,8 +769,9 @@ where
|
||||||
self.emit(number);
|
self.emit(number);
|
||||||
}
|
}
|
||||||
'#' => {
|
'#' => {
|
||||||
let comment = self.lex_comment()?;
|
let _comment = self.lex_comment();
|
||||||
self.emit(comment);
|
#[cfg(feature = "full-lexer")]
|
||||||
|
self.emit(_comment?);
|
||||||
}
|
}
|
||||||
'"' | '\'' => {
|
'"' | '\'' => {
|
||||||
let string = self.lex_string(StringKind::String)?;
|
let string = self.lex_string(StringKind::String)?;
|
||||||
|
@ -1101,6 +1118,7 @@ where
|
||||||
self.at_begin_of_line = true;
|
self.at_begin_of_line = true;
|
||||||
self.emit((Tok::Newline, TextRange::new(tok_start, tok_end)));
|
self.emit((Tok::Newline, TextRange::new(tok_start, tok_end)));
|
||||||
} else {
|
} else {
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
self.emit((Tok::NonLogicalNewline, TextRange::new(tok_start, tok_end)));
|
self.emit((Tok::NonLogicalNewline, TextRange::new(tok_start, tok_end)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1408,6 +1426,7 @@ mod tests {
|
||||||
($($name:ident: $eol:expr,)*) => {
|
($($name:ident: $eol:expr,)*) => {
|
||||||
$(
|
$(
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
let source = format!(r"99232 # {}", $eol);
|
let source = format!(r"99232 # {}", $eol);
|
||||||
let tokens = lex_source(&source);
|
let tokens = lex_source(&source);
|
||||||
|
@ -1428,6 +1447,7 @@ mod tests {
|
||||||
($($name:ident: $eol:expr,)*) => {
|
($($name:ident: $eol:expr,)*) => {
|
||||||
$(
|
$(
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
let source = format!("123 # Foo{}456", $eol);
|
let source = format!("123 # Foo{}456", $eol);
|
||||||
let tokens = lex_source(&source);
|
let tokens = lex_source(&source);
|
||||||
|
@ -1607,6 +1627,7 @@ mod tests {
|
||||||
($($name:ident: $eol:expr,)*) => {
|
($($name:ident: $eol:expr,)*) => {
|
||||||
$(
|
$(
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
let source = r"x = [
|
let source = r"x = [
|
||||||
|
|
||||||
|
@ -1669,6 +1690,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
fn test_non_logical_newline_in_string_continuation() {
|
fn test_non_logical_newline_in_string_continuation() {
|
||||||
let source = r"(
|
let source = r"(
|
||||||
'a'
|
'a'
|
||||||
|
@ -1698,6 +1720,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
fn test_logical_newline_line_comment() {
|
fn test_logical_newline_line_comment() {
|
||||||
let source = "#Hello\n#World";
|
let source = "#Hello\n#World";
|
||||||
let tokens = lex_source(source);
|
let tokens = lex_source(source);
|
||||||
|
|
|
@ -190,9 +190,10 @@ pub fn parse_tokens(
|
||||||
source_path: &str,
|
source_path: &str,
|
||||||
) -> Result<ast::Mod, ParseError> {
|
) -> Result<ast::Mod, ParseError> {
|
||||||
let marker_token = (Tok::start_marker(mode), Default::default());
|
let marker_token = (Tok::start_marker(mode), Default::default());
|
||||||
let lexer = iter::once(Ok(marker_token))
|
let lexer = iter::once(Ok(marker_token)).chain(lxr);
|
||||||
.chain(lxr)
|
#[cfg(feature = "full-lexer")]
|
||||||
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
let lexer =
|
||||||
|
lexer.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||||
python::TopParser::new()
|
python::TopParser::new()
|
||||||
.parse(
|
.parse(
|
||||||
lexer
|
lexer
|
||||||
|
|
|
@ -1743,6 +1743,6 @@ extern {
|
||||||
name => token::Tok::Name { name: <String> },
|
name => token::Tok::Name { name: <String> },
|
||||||
"\n" => token::Tok::Newline,
|
"\n" => token::Tok::Newline,
|
||||||
";" => token::Tok::Semi,
|
";" => token::Tok::Semi,
|
||||||
"#" => token::Tok::Comment(_),
|
// "#" => token::Tok::Comment(_),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
2338
parser/src/python.rs
generated
2338
parser/src/python.rs
generated
File diff suppressed because it is too large
Load diff
|
@ -86,18 +86,19 @@ where
|
||||||
|
|
||||||
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
||||||
lex_result.as_ref().map_or(false, |(tok, _)| {
|
lex_result.as_ref().map_or(false, |(tok, _)| {
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
|
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
|
||||||
self.start_of_line
|
return self.start_of_line;
|
||||||
} else {
|
|
||||||
matches!(
|
|
||||||
tok,
|
|
||||||
Tok::StartModule
|
|
||||||
| Tok::StartInteractive
|
|
||||||
| Tok::Newline
|
|
||||||
| Tok::Indent
|
|
||||||
| Tok::Dedent
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
matches!(
|
||||||
|
tok,
|
||||||
|
Tok::StartModule
|
||||||
|
| Tok::StartInteractive
|
||||||
|
| Tok::Newline
|
||||||
|
| Tok::Indent
|
||||||
|
| Tok::Dedent
|
||||||
|
)
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -43,11 +43,13 @@ pub enum Tok {
|
||||||
triple_quoted: bool,
|
triple_quoted: bool,
|
||||||
},
|
},
|
||||||
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
|
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
Comment(String),
|
Comment(String),
|
||||||
/// Token value for a newline.
|
/// Token value for a newline.
|
||||||
Newline,
|
Newline,
|
||||||
/// Token value for a newline that is not a logical line break. These are filtered out of
|
/// Token value for a newline that is not a logical line break. These are filtered out of
|
||||||
/// the token stream prior to parsing.
|
/// the token stream prior to parsing.
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
NonLogicalNewline,
|
NonLogicalNewline,
|
||||||
/// Token value for an indent.
|
/// Token value for an indent.
|
||||||
Indent,
|
Indent,
|
||||||
|
@ -223,6 +225,7 @@ impl fmt::Display for Tok {
|
||||||
write!(f, "{kind}{quotes}{value}{quotes}")
|
write!(f, "{kind}{quotes}{value}{quotes}")
|
||||||
}
|
}
|
||||||
Newline => f.write_str("Newline"),
|
Newline => f.write_str("Newline"),
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
NonLogicalNewline => f.write_str("NonLogicalNewline"),
|
NonLogicalNewline => f.write_str("NonLogicalNewline"),
|
||||||
Indent => f.write_str("Indent"),
|
Indent => f.write_str("Indent"),
|
||||||
Dedent => f.write_str("Dedent"),
|
Dedent => f.write_str("Dedent"),
|
||||||
|
@ -236,6 +239,7 @@ impl fmt::Display for Tok {
|
||||||
Rsqb => f.write_str("']'"),
|
Rsqb => f.write_str("']'"),
|
||||||
Colon => f.write_str("':'"),
|
Colon => f.write_str("':'"),
|
||||||
Comma => f.write_str("','"),
|
Comma => f.write_str("','"),
|
||||||
|
#[cfg(feature = "full-lexer")]
|
||||||
Comment(value) => f.write_str(value),
|
Comment(value) => f.write_str(value),
|
||||||
Semi => f.write_str("';'"),
|
Semi => f.write_str("';'"),
|
||||||
Plus => f.write_str("'+'"),
|
Plus => f.write_str("'+'"),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue