mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-08 05:35:22 +00:00
Add NonLogicalNewline token
This token is completely ignored by the parser, but it's useful for other users of the lexer, such as the Ruff linter. For example, the token is helpful for a "trailing comma" lint. The same idea exists in Python's `tokenize` module - there is a NEWLINE token (logical newline), and a NL token (non-logical newline). Fixes #4385.
This commit is contained in:
parent
fea57bcb04
commit
5cc208cc43
3 changed files with 84 additions and 3 deletions
|
@ -1075,10 +1075,13 @@ where
|
|||
self.next_char();
|
||||
let tok_end = self.get_pos();
|
||||
|
||||
// Depending on the nesting level, we emit newline or not:
|
||||
// Depending on the nesting level, we emit a logical or
|
||||
// non-logical newline:
|
||||
if self.nesting == 0 {
|
||||
self.at_begin_of_line = true;
|
||||
self.emit((tok_start, Tok::Newline, tok_end));
|
||||
} else {
|
||||
self.emit((tok_start, Tok::NonLogicalNewline, tok_end));
|
||||
}
|
||||
}
|
||||
' ' | '\t' | '\x0C' => {
|
||||
|
@ -1464,7 +1467,16 @@ mod tests {
|
|||
$(
|
||||
#[test]
|
||||
fn $name() {
|
||||
let source = format!("x = [{} 1,2{}]{}", $eol, $eol, $eol);
|
||||
let source = r"x = [
|
||||
|
||||
1,2
|
||||
,(3,
|
||||
4,
|
||||
), {
|
||||
5,
|
||||
6,\
|
||||
7}]
|
||||
".replace("\n", $eol);
|
||||
let tokens = lex_source(&source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
|
@ -1474,9 +1486,32 @@ mod tests {
|
|||
},
|
||||
Tok::Equal,
|
||||
Tok::Lsqb,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(1) },
|
||||
Tok::Comma,
|
||||
Tok::Int { value: BigInt::from(2) },
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Comma,
|
||||
Tok::Lpar,
|
||||
Tok::Int { value: BigInt::from(3) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(4) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Rpar,
|
||||
Tok::Comma,
|
||||
Tok::Lbrace,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(5) },
|
||||
Tok::Comma,
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Int { value: BigInt::from(6) },
|
||||
Tok::Comma,
|
||||
// Continuation here - no NonLogicalNewline.
|
||||
Tok::Int { value: BigInt::from(7) },
|
||||
Tok::Rbrace,
|
||||
Tok::Rsqb,
|
||||
Tok::Newline,
|
||||
]
|
||||
|
@ -1492,6 +1527,50 @@ mod tests {
|
|||
test_newline_in_brackets_unix_eol: UNIX_EOL,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_logical_newline_in_string_continuation() {
|
||||
let source = r"(
|
||||
'a'
|
||||
'b'
|
||||
|
||||
'c' \
|
||||
'd'
|
||||
)";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Lpar,
|
||||
Tok::NonLogicalNewline,
|
||||
stok("a"),
|
||||
Tok::NonLogicalNewline,
|
||||
stok("b"),
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::NonLogicalNewline,
|
||||
stok("c"),
|
||||
stok("d"),
|
||||
Tok::NonLogicalNewline,
|
||||
Tok::Rpar,
|
||||
Tok::Newline,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_logical_newline_line_comment() {
|
||||
let source = "#Hello\n#World";
|
||||
let tokens = lex_source(source);
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Tok::Comment("#Hello".to_owned()),
|
||||
// tokenize.py does put an NL here...
|
||||
Tok::Comment("#World".to_owned()),
|
||||
// ... and here, but doesn't seem very useful.
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_operators() {
|
||||
let source = "//////=/ /";
|
||||
|
|
|
@ -96,7 +96,7 @@ pub fn parse_located(
|
|||
let marker_token = (Default::default(), mode.to_marker(), Default::default());
|
||||
let tokenizer = iter::once(Ok(marker_token))
|
||||
.chain(lxr)
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. }));
|
||||
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
|
||||
|
||||
python::TopParser::new()
|
||||
.parse(tokenizer)
|
||||
|
|
|
@ -25,6 +25,7 @@ pub enum Tok {
|
|||
triple_quoted: bool,
|
||||
},
|
||||
Newline,
|
||||
NonLogicalNewline,
|
||||
Indent,
|
||||
Dedent,
|
||||
StartModule,
|
||||
|
@ -136,6 +137,7 @@ impl fmt::Display for Tok {
|
|||
write!(f, "{kind}{quotes}{value}{quotes}")
|
||||
}
|
||||
Newline => f.write_str("Newline"),
|
||||
NonLogicalNewline => f.write_str("NonLogicalNewline"),
|
||||
Indent => f.write_str("Indent"),
|
||||
Dedent => f.write_str("Dedent"),
|
||||
StartModule => f.write_str("StartProgram"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue