mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-08 05:35:22 +00:00
full-lexer feature
This commit is contained in:
parent
dd4cc25227
commit
53d7d606a6
7 changed files with 1216 additions and 1188 deletions
|
@ -13,6 +13,7 @@ default = ["location"]
|
|||
location = ["rustpython-ast/location", "rustpython-parser-core/location"]
|
||||
serde = ["dep:serde", "rustpython-parser-core/serde"]
|
||||
all-nodes-with-ranges = ["rustpython-ast/all-nodes-with-ranges"]
|
||||
full-lexer = []
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { workspace = true }
|
||||
|
|
|
@ -450,6 +450,7 @@ where
|
|||
}
|
||||
|
||||
/// Lex a single comment.
|
||||
#[cfg(feature = "full-lexer")]
|
||||
fn lex_comment(&mut self) -> LexResult {
|
||||
let start_pos = self.get_pos();
|
||||
let mut value = String::new();
|
||||
|
@ -465,6 +466,20 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
/// abandon a comment
|
||||
#[cfg(not(feature = "full-lexer"))]
|
||||
fn lex_comment(&mut self) {
|
||||
loop {
|
||||
match self.window[0] {
|
||||
Some('\n' | '\r') | None => {
|
||||
return;
|
||||
}
|
||||
Some(_) => {}
|
||||
}
|
||||
self.next_char().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a string literal.
|
||||
fn lex_string(&mut self, kind: StringKind) -> LexResult {
|
||||
let start_pos = self.get_pos();
|
||||
|
@ -611,8 +626,9 @@ where
|
|||
tabs += 1;
|
||||
}
|
||||
Some('#') => {
|
||||
let comment = self.lex_comment()?;
|
||||
self.emit(comment);
|
||||
let _comment = self.lex_comment();
|
||||
#[cfg(feature = "full-lexer")]
|
||||
self.emit(_comment?);
|
||||
spaces = 0;
|
||||
tabs = 0;
|
||||
}
|
||||
|
@ -753,8 +769,9 @@ where
|
|||
self.emit(number);
|
||||
}
|
||||
'#' => {
|
||||
let comment = self.lex_comment()?;
|
||||
self.emit(comment);
|
||||
let _comment = self.lex_comment();
|
||||
#[cfg(feature = "full-lexer")]
|
||||
self.emit(_comment?);
|
||||
}
|
||||
'"' | '\'' => {
|
||||
let string = self.lex_string(StringKind::String)?;
|
||||
|
@ -1101,6 +1118,7 @@ where
|
|||
self.at_begin_of_line = true;
|
||||
self.emit((Tok::Newline, TextRange::new(tok_start, tok_end)));
|
||||
} else {
|
||||
#[cfg(feature = "full-lexer")]
|
||||
self.emit((Tok::NonLogicalNewline, TextRange::new(tok_start, tok_end)));
|
||||
}
|
||||
}
|
||||
|
@ -1408,6 +1426,7 @@ mod tests {
|
|||
($($name:ident: $eol:expr,)*) => {
|
||||
$(
|
||||
#[test]
|
||||
#[cfg(feature = "full-lexer")]
|
||||
fn $name() {
|
||||
let source = format!(r"99232 # {}", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
|
@ -1428,6 +1447,7 @@ mod tests {
|
|||
($($name:ident: $eol:expr,)*) => {
|
||||
$(
|
||||
#[test]
|
||||
#[cfg(feature = "full-lexer")]
|
||||
fn $name() {
|
||||
let source = format!("123 # Foo{}456", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
|
@ -1607,6 +1627,7 @@ mod tests {
|
|||
($($name:ident: $eol:expr,)*) => {
|
||||
$(
|
||||
#[test]
|
||||
#[cfg(feature = "full-lexer")]
|
||||
fn $name() {
|
||||
let source = r"x = [
|
||||
|
||||
|
@ -1669,6 +1690,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "full-lexer")]
|
||||
fn test_non_logical_newline_in_string_continuation() {
|
||||
let source = r"(
|
||||
'a'
|
||||
|
@ -1698,6 +1720,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "full-lexer")]
|
||||
fn test_logical_newline_line_comment() {
|
||||
let source = "#Hello\n#World";
|
||||
let tokens = lex_source(source);
|
||||
|
|
|
@ -190,9 +190,10 @@ pub fn parse_tokens(
|
|||
source_path: &str,
|
||||
) -> Result<ast::Mod, ParseError> {
|
||||
let marker_token = (Tok::start_marker(mode), Default::default());
|
||||
let lexer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
let lexer = iter::once(Ok(marker_token)).chain(lxr);
|
||||
#[cfg(feature = "full-lexer")]
|
||||
let lexer =
|
||||
lexer.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
python::TopParser::new()
|
||||
.parse(
|
||||
lexer
|
||||
|
|
|
@ -1743,6 +1743,6 @@ extern {
|
|||
name => token::Tok::Name { name: <String> },
|
||||
"\n" => token::Tok::Newline,
|
||||
";" => token::Tok::Semi,
|
||||
"#" => token::Tok::Comment(_),
|
||||
// "#" => token::Tok::Comment(_),
|
||||
}
|
||||
}
|
||||
|
|
2338
parser/src/python.rs
generated
2338
parser/src/python.rs
generated
File diff suppressed because it is too large
Load diff
|
@ -86,18 +86,19 @@ where
|
|||
|
||||
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
|
||||
lex_result.as_ref().map_or(false, |(tok, _)| {
|
||||
#[cfg(feature = "full-lexer")]
|
||||
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
|
||||
self.start_of_line
|
||||
} else {
|
||||
matches!(
|
||||
tok,
|
||||
Tok::StartModule
|
||||
| Tok::StartInteractive
|
||||
| Tok::Newline
|
||||
| Tok::Indent
|
||||
| Tok::Dedent
|
||||
)
|
||||
return self.start_of_line;
|
||||
}
|
||||
|
||||
matches!(
|
||||
tok,
|
||||
Tok::StartModule
|
||||
| Tok::StartInteractive
|
||||
| Tok::Newline
|
||||
| Tok::Indent
|
||||
| Tok::Dedent
|
||||
)
|
||||
})
|
||||
});
|
||||
|
||||
|
|
|
@ -43,11 +43,13 @@ pub enum Tok {
|
|||
triple_quoted: bool,
|
||||
},
|
||||
/// Token value for a comment. These are filtered out of the token stream prior to parsing.
|
||||
#[cfg(feature = "full-lexer")]
|
||||
Comment(String),
|
||||
/// Token value for a newline.
|
||||
Newline,
|
||||
/// Token value for a newline that is not a logical line break. These are filtered out of
|
||||
/// the token stream prior to parsing.
|
||||
#[cfg(feature = "full-lexer")]
|
||||
NonLogicalNewline,
|
||||
/// Token value for an indent.
|
||||
Indent,
|
||||
|
@ -223,6 +225,7 @@ impl fmt::Display for Tok {
|
|||
write!(f, "{kind}{quotes}{value}{quotes}")
|
||||
}
|
||||
Newline => f.write_str("Newline"),
|
||||
#[cfg(feature = "full-lexer")]
|
||||
NonLogicalNewline => f.write_str("NonLogicalNewline"),
|
||||
Indent => f.write_str("Indent"),
|
||||
Dedent => f.write_str("Dedent"),
|
||||
|
@ -236,6 +239,7 @@ impl fmt::Display for Tok {
|
|||
Rsqb => f.write_str("']'"),
|
||||
Colon => f.write_str("':'"),
|
||||
Comma => f.write_str("','"),
|
||||
#[cfg(feature = "full-lexer")]
|
||||
Comment(value) => f.write_str(value),
|
||||
Semi => f.write_str("';'"),
|
||||
Plus => f.write_str("'+'"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue