From b2303029471fa74124900d957d1f9d3a1adbdbe5 Mon Sep 17 00:00:00 2001 From: Zsolt Dollenstein Date: Sat, 27 May 2023 19:33:20 +0100 Subject: [PATCH] Fix tokenizing `0else` This is an obscure one. `_ if 0else _` failed to parse with some very weird errors. It turns out that the tokenizer tries to parse `0else` as a single number, but when it encounters `l` it realizes it can't be a single number and it backtracks. Unfortunately the backtracking logic was broken, and it failed to correctly backtrack one of the offsets used for whitespace parsing (the byte offset since the start of the line). This caused whitespace nodes to refer to incorrect parts of the input text, eventually resulting in the above behavior. This PR fixes the bookkeeping when the tokenizer backtracks. Reported in #930. --- native/libcst/src/tokenizer/text_position/mod.rs | 5 +++++ native/libcst/tests/fixtures/expr.py | 1 + 2 files changed, 6 insertions(+) diff --git a/native/libcst/src/tokenizer/text_position/mod.rs b/native/libcst/src/tokenizer/text_position/mod.rs index 9c394d52..2e58600a 100644 --- a/native/libcst/src/tokenizer/text_position/mod.rs +++ b/native/libcst/src/tokenizer/text_position/mod.rs @@ -117,6 +117,10 @@ impl<'t> TextPosition<'t> { .inner_char_column_number .checked_sub(1) .expect("cannot back up past the beginning of a line."); + self.inner_byte_column_number = self + .inner_byte_column_number + .checked_sub(cw.byte_width) + .expect("cannot back up past the beginning of a line."); self.inner_byte_idx -= cw.byte_width; } else { panic!("Tried to backup past the beginning of the text.") @@ -217,6 +221,7 @@ impl fmt::Debug for TextPosition<'_> { .field("char_widths", &EllipsisDebug) .field("inner_byte_idx", &self.inner_byte_idx) .field("inner_char_column_number", &self.inner_char_column_number) + .field("inner_byte_column_number", &self.inner_byte_column_number) .field("inner_line_number", &self.inner_line_number) .finish() } diff --git a/native/libcst/tests/fixtures/expr.py b/native/libcst/tests/fixtures/expr.py index c1c4e9b7..abb78ab9 100644 --- a/native/libcst/tests/fixtures/expr.py +++ b/native/libcst/tests/fixtures/expr.py @@ -44,6 +44,7 @@ lambda a, b, c=True, *vararg, d=(v1 << 2), e='str', **kwargs : a + b manylambdas = lambda x=lambda y=lambda z=1: z: y(): x() foo = (lambda port_id, ignore_missing: {"port1": port1_resource, "port2": port2_resource}[port_id]) 1 if True else 2 +_ if 0else _ str or None if True else str or bytes or None (str or None) if True else (str or bytes or None) str or None if (1 if True else 2) else str or bytes or None