Make edition per-token, not per-file

More correctly, *also* per-token. Because as it turns out, while the top-level edition affects parsing (I think), the per-token edition affects escaping of identifiers/keywords.
This commit is contained in:
Chayim Refael Friedman 2025-01-07 08:00:18 +02:00
parent 3f2bbe9fed
commit 97afb7bfba
34 changed files with 480 additions and 316 deletions

View file

@ -1,6 +1,6 @@
//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
use std::fmt;
use std::{fmt, hash::Hash};
use intern::Symbol;
use rustc_hash::{FxHashMap, FxHashSet};
@ -58,7 +58,7 @@ pub mod dummy_test_span_utils {
),
ast_id: span::ROOT_ERASED_FILE_AST_ID,
},
ctx: SyntaxContextId::ROOT,
ctx: SyntaxContextId::root(Edition::CURRENT),
};
pub struct DummyTestSpanMap;
@ -74,7 +74,7 @@ pub mod dummy_test_span_utils {
),
ast_id: span::ROOT_ERASED_FILE_AST_ID,
},
ctx: SyntaxContextId::ROOT,
ctx: SyntaxContextId::root(Edition::CURRENT),
}
}
}
@ -141,15 +141,16 @@ where
pub fn token_tree_to_syntax_node<Ctx>(
tt: &tt::TopSubtree<SpanData<Ctx>>,
entry_point: parser::TopEntryPoint,
edition: parser::Edition,
span_to_edition: &mut dyn FnMut(Ctx) -> Edition,
top_edition: Edition,
) -> (Parse<SyntaxNode>, SpanMap<Ctx>)
where
SpanData<Ctx>: Copy + fmt::Debug,
Ctx: PartialEq,
Ctx: Copy + fmt::Debug + PartialEq + PartialEq + Eq + Hash,
{
let buffer = tt.view().strip_invisible();
let parser_input = to_parser_input(edition, buffer);
let parser_output = entry_point.parse(&parser_input, edition);
let parser_input = to_parser_input(buffer, span_to_edition);
// It matters what edition we parse with even when we escape all identifiers correctly.
let parser_output = entry_point.parse(&parser_input, top_edition);
let mut tree_sink = TtTreeSink::new(buffer.cursor());
for event in parser_output.iter() {
match event {

View file

@ -2,17 +2,20 @@
//! format that works for our parser.
use std::fmt;
use std::hash::Hash;
use span::Edition;
use rustc_hash::FxHashMap;
use span::{Edition, SpanData};
use syntax::{SyntaxKind, SyntaxKind::*, T};
pub fn to_parser_input<S: Copy + fmt::Debug>(
edition: Edition,
buffer: tt::TokenTreesView<'_, S>,
pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
buffer: tt::TokenTreesView<'_, SpanData<Ctx>>,
span_to_edition: &mut dyn FnMut(Ctx) -> Edition,
) -> parser::Input {
let mut res = parser::Input::default();
let mut current = buffer.cursor();
let mut syntax_context_to_edition_cache = FxHashMap::default();
while !current.eof() {
let tt = current.token_tree();
@ -57,20 +60,25 @@ pub fn to_parser_input<S: Copy + fmt::Debug>(
res.was_joint();
}
}
tt::Leaf::Ident(ident) => match ident.sym.as_str() {
"_" => res.push(T![_]),
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
_ if ident.is_raw.yes() => res.push(IDENT),
text => match SyntaxKind::from_keyword(text, edition) {
Some(kind) => res.push(kind),
None => {
let contextual_keyword =
SyntaxKind::from_contextual_keyword(text, edition)
.unwrap_or(SyntaxKind::IDENT);
res.push_ident(contextual_keyword);
}
},
},
tt::Leaf::Ident(ident) => {
let edition = *syntax_context_to_edition_cache
.entry(ident.span.ctx)
.or_insert_with(|| span_to_edition(ident.span.ctx));
match ident.sym.as_str() {
"_" => res.push(T![_]),
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
_ if ident.is_raw.yes() => res.push(IDENT),
text => match SyntaxKind::from_keyword(text, edition) {
Some(kind) => res.push(kind),
None => {
let contextual_keyword =
SyntaxKind::from_contextual_keyword(text, edition)
.unwrap_or(SyntaxKind::IDENT);
res.push_ident(contextual_keyword);
}
},
}
}
tt::Leaf::Punct(punct) => {
let kind = SyntaxKind::from_char(punct.char)
.unwrap_or_else(|| panic!("{punct:#?} is not a valid punct"));