Add full-lexer feature (#36)

This commit is contained in:
Jeong, YunWon 2023-05-16 02:21:34 +09:00 committed by GitHub
parent dd4cc25227
commit 27e3873dc2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 1216 additions and 1188 deletions

View file

@ -13,6 +13,7 @@ default = ["location"]
location = ["rustpython-ast/location", "rustpython-parser-core/location"]
serde = ["dep:serde", "rustpython-parser-core/serde"]
all-nodes-with-ranges = ["rustpython-ast/all-nodes-with-ranges"]
full-lexer = []
[build-dependencies]
anyhow = { workspace = true }

View file

@ -450,6 +450,7 @@ where
}
/// Lex a single comment.
#[cfg(feature = "full-lexer")]
fn lex_comment(&mut self) -> LexResult {
let start_pos = self.get_pos();
let mut value = String::new();
@ -465,6 +466,20 @@ where
}
}
/// Discard comment if full-lexer is not enabled.
#[cfg(not(feature = "full-lexer"))]
fn lex_comment(&mut self) {
loop {
match self.window[0] {
Some('\n' | '\r') | None => {
return;
}
Some(_) => {}
}
self.next_char().unwrap();
}
}
/// Lex a string literal.
fn lex_string(&mut self, kind: StringKind) -> LexResult {
let start_pos = self.get_pos();
@ -611,8 +626,9 @@ where
tabs += 1;
}
Some('#') => {
let comment = self.lex_comment()?;
self.emit(comment);
let _comment = self.lex_comment();
#[cfg(feature = "full-lexer")]
self.emit(_comment?);
spaces = 0;
tabs = 0;
}
@ -753,8 +769,9 @@ where
self.emit(number);
}
'#' => {
let comment = self.lex_comment()?;
self.emit(comment);
let _comment = self.lex_comment();
#[cfg(feature = "full-lexer")]
self.emit(_comment?);
}
'"' | '\'' => {
let string = self.lex_string(StringKind::String)?;
@ -1101,6 +1118,7 @@ where
self.at_begin_of_line = true;
self.emit((Tok::Newline, TextRange::new(tok_start, tok_end)));
} else {
#[cfg(feature = "full-lexer")]
self.emit((Tok::NonLogicalNewline, TextRange::new(tok_start, tok_end)));
}
}
@ -1408,6 +1426,7 @@ mod tests {
($($name:ident: $eol:expr,)*) => {
$(
#[test]
#[cfg(feature = "full-lexer")]
fn $name() {
let source = format!(r"99232 # {}", $eol);
let tokens = lex_source(&source);
@ -1428,6 +1447,7 @@ mod tests {
($($name:ident: $eol:expr,)*) => {
$(
#[test]
#[cfg(feature = "full-lexer")]
fn $name() {
let source = format!("123 # Foo{}456", $eol);
let tokens = lex_source(&source);
@ -1607,6 +1627,7 @@ mod tests {
($($name:ident: $eol:expr,)*) => {
$(
#[test]
#[cfg(feature = "full-lexer")]
fn $name() {
let source = r"x = [
@ -1669,6 +1690,7 @@ mod tests {
}
#[test]
#[cfg(feature = "full-lexer")]
fn test_non_logical_newline_in_string_continuation() {
let source = r"(
'a'
@ -1698,6 +1720,7 @@ mod tests {
}
#[test]
#[cfg(feature = "full-lexer")]
fn test_logical_newline_line_comment() {
let source = "#Hello\n#World";
let tokens = lex_source(source);

View file

@ -190,9 +190,10 @@ pub fn parse_tokens(
source_path: &str,
) -> Result<ast::Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), Default::default());
let lexer = iter::once(Ok(marker_token))
.chain(lxr)
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
let lexer = iter::once(Ok(marker_token)).chain(lxr);
#[cfg(feature = "full-lexer")]
let lexer =
lexer.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
python::TopParser::new()
.parse(
lexer

View file

@ -1743,6 +1743,6 @@ extern {
name => token::Tok::Name { name: <String> },
"\n" => token::Tok::Newline,
";" => token::Tok::Semi,
"#" => token::Tok::Comment(_),
// "#" => token::Tok::Comment(_),
}
}

2338
parser/src/python.rs generated

File diff suppressed because it is too large Load diff

View file

@ -86,18 +86,19 @@ where
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
lex_result.as_ref().map_or(false, |(tok, _)| {
#[cfg(feature = "full-lexer")]
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
self.start_of_line
} else {
matches!(
tok,
Tok::StartModule
| Tok::StartInteractive
| Tok::Newline
| Tok::Indent
| Tok::Dedent
)
return self.start_of_line;
}
matches!(
tok,
Tok::StartModule
| Tok::StartInteractive
| Tok::Newline
| Tok::Indent
| Tok::Dedent
)
})
});

View file

@ -43,11 +43,13 @@ pub enum Tok {
triple_quoted: bool,
},
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
#[cfg(feature = "full-lexer")]
Comment(String),
/// Token value for a newline.
Newline,
/// Token value for a newline that is not a logical line break. These are filtered out of
/// the token stream prior to parsing.
#[cfg(feature = "full-lexer")]
NonLogicalNewline,
/// Token value for an indent.
Indent,
@ -223,6 +225,7 @@ impl fmt::Display for Tok {
write!(f, "{kind}{quotes}{value}{quotes}")
}
Newline => f.write_str("Newline"),
#[cfg(feature = "full-lexer")]
NonLogicalNewline => f.write_str("NonLogicalNewline"),
Indent => f.write_str("Indent"),
Dedent => f.write_str("Dedent"),
@ -236,6 +239,7 @@ impl fmt::Display for Tok {
Rsqb => f.write_str("']'"),
Colon => f.write_str("':'"),
Comma => f.write_str("','"),
#[cfg(feature = "full-lexer")]
Comment(value) => f.write_str(value),
Semi => f.write_str("';'"),
Plus => f.write_str("'+'"),