Track synthetic tokens, to be able to remove them again later

This commit is contained in:
Florian Diebold 2022-02-08 18:13:18 +01:00
parent 1b5cd03a37
commit 1a5aa84e9f
6 changed files with 133 additions and 39 deletions

View file

@ -5,8 +5,8 @@ use std::sync::Arc;
use base_db::{salsa, SourceDatabase}; use base_db::{salsa, SourceDatabase};
use either::Either; use either::Either;
use limit::Limit; use limit::Limit;
use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult, SyntheticToken}; use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult};
use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hash::FxHashSet;
use syntax::{ use syntax::{
algo::diff, algo::diff,
ast::{self, HasAttrs, HasDocComments}, ast::{self, HasAttrs, HasDocComments},
@ -442,7 +442,7 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
)); ));
} }
fixup::reverse_fixups(&mut tt); fixup::reverse_fixups(&mut tt, &macro_arg.1);
ExpandResult { value: Some(Arc::new(tt)), err } ExpandResult { value: Some(Arc::new(tt)), err }
} }

View file

@ -1,10 +1,10 @@
use mbe::SyntheticToken; use mbe::{SyntheticToken, SyntheticTokenId, TokenMap};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use syntax::{ use syntax::{
ast::{self, AstNode}, ast::{self, AstNode},
match_ast, SyntaxKind, SyntaxNode, SyntaxToken, match_ast, SyntaxKind, SyntaxNode, TextRange,
}; };
use tt::{Leaf, Subtree}; use tt::Subtree;
#[derive(Debug)] #[derive(Debug)]
pub struct SyntaxFixups { pub struct SyntaxFixups {
@ -16,6 +16,7 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
let mut append = FxHashMap::default(); let mut append = FxHashMap::default();
let mut replace = FxHashMap::default(); let mut replace = FxHashMap::default();
let mut preorder = node.preorder(); let mut preorder = node.preorder();
let empty_id = SyntheticTokenId(0);
while let Some(event) = preorder.next() { while let Some(event) = preorder.next() {
let node = match event { let node = match event {
syntax::WalkEvent::Enter(node) => node, syntax::WalkEvent::Enter(node) => node,
@ -27,12 +28,32 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
preorder.skip_subtree(); preorder.skip_subtree();
continue; continue;
} }
let end_range = TextRange::empty(node.text_range().end());
match_ast! { match_ast! {
match node { match node {
ast::FieldExpr(it) => { ast::FieldExpr(it) => {
if it.name_ref().is_none() { if it.name_ref().is_none() {
// incomplete field access: some_expr.| // incomplete field access: some_expr.|
append.insert(node.clone(), vec![(SyntaxKind::IDENT, "__ra_fixup".into())]); append.insert(node.clone(), vec![
SyntheticToken {
kind: SyntaxKind::IDENT,
text: "__ra_fixup".into(),
range: end_range,
id: empty_id,
},
]);
}
},
ast::ExprStmt(it) => {
if it.semicolon_token().is_none() {
append.insert(node.clone(), vec![
SyntheticToken {
kind: SyntaxKind::SEMICOLON,
text: ";".into(),
range: end_range,
id: empty_id,
},
]);
} }
}, },
_ => (), _ => (),
@ -42,20 +63,21 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
SyntaxFixups { append, replace } SyntaxFixups { append, replace }
} }
pub fn reverse_fixups(tt: &mut Subtree) { pub fn reverse_fixups(tt: &mut Subtree, token_map: &TokenMap) {
eprintln!("token_map: {:?}", token_map);
tt.token_trees.retain(|tt| match tt { tt.token_trees.retain(|tt| match tt {
tt::TokenTree::Leaf(Leaf::Ident(ident)) => ident.text != "__ra_fixup", tt::TokenTree::Leaf(leaf) => token_map.synthetic_token_id(leaf.id()).is_none(),
_ => true, _ => true,
}); });
tt.token_trees.iter_mut().for_each(|tt| match tt { tt.token_trees.iter_mut().for_each(|tt| match tt {
tt::TokenTree::Subtree(tt) => reverse_fixups(tt), tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map),
_ => {} _ => {}
}); });
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use expect_test::{Expect, expect}; use expect_test::{expect, Expect};
use super::reverse_fixups; use super::reverse_fixups;
@ -63,7 +85,7 @@ mod tests {
fn check(ra_fixture: &str, mut expect: Expect) { fn check(ra_fixture: &str, mut expect: Expect) {
let parsed = syntax::SourceFile::parse(ra_fixture); let parsed = syntax::SourceFile::parse(ra_fixture);
let fixups = super::fixup_syntax(&parsed.syntax_node()); let fixups = super::fixup_syntax(&parsed.syntax_node());
let (mut tt, _tmap) = mbe::syntax_node_to_token_tree_censored( let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(
&parsed.syntax_node(), &parsed.syntax_node(),
fixups.replace, fixups.replace,
fixups.append, fixups.append,
@ -77,9 +99,14 @@ mod tests {
// the fixed-up tree should be syntactically valid // the fixed-up tree should be syntactically valid
let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems); let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
assert_eq!(parse.errors(), &[], "parse has syntax errors. parse tree:\n{:#?}", parse.syntax_node()); assert_eq!(
parse.errors(),
&[],
"parse has syntax errors. parse tree:\n{:#?}",
parse.syntax_node()
);
reverse_fixups(&mut tt); reverse_fixups(&mut tt, &tmap);
// the fixed-up + reversed version should be equivalent to the original input // the fixed-up + reversed version should be equivalent to the original input
// (but token IDs don't matter) // (but token IDs don't matter)
@ -89,48 +116,60 @@ mod tests {
#[test] #[test]
fn incomplete_field_expr_1() { fn incomplete_field_expr_1() {
check(r#" check(
r#"
fn foo() { fn foo() {
a. a.
} }
"#, expect![[r#" "#,
expect![[r#"
fn foo () {a . __ra_fixup} fn foo () {a . __ra_fixup}
"#]]) "#]],
)
} }
#[test] #[test]
fn incomplete_field_expr_2() { fn incomplete_field_expr_2() {
check(r#" check(
r#"
fn foo() { fn foo() {
a. ; a. ;
} }
"#, expect![[r#" "#,
expect![[r#"
fn foo () {a . __ra_fixup ;} fn foo () {a . __ra_fixup ;}
"#]]) "#]],
)
} }
#[test] #[test]
fn incomplete_field_expr_3() { fn incomplete_field_expr_3() {
check(r#" check(
r#"
fn foo() { fn foo() {
a. ; a. ;
bar(); bar();
} }
"#, expect![[r#" "#,
expect![[r#"
fn foo () {a . __ra_fixup ; bar () ;} fn foo () {a . __ra_fixup ; bar () ;}
"#]]) "#]],
)
} }
#[test] #[test]
fn field_expr_before_call() { fn field_expr_before_call() {
// another case that easily happens while typing // another case that easily happens while typing
check(r#" check(
r#"
fn foo() { fn foo() {
a.b a.b
bar(); bar();
} }
"#, expect![[r#" "#,
fn foo () {a . b bar () ;} expect![[r#"
"#]]) fn foo () {a . b ; bar () ;}
"#]],
)
} }
} }

View file

@ -31,6 +31,7 @@ pub use crate::{
syntax_bridge::{ syntax_bridge::{
parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree, parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree,
syntax_node_to_token_tree_censored, token_tree_to_syntax_node, SyntheticToken, syntax_node_to_token_tree_censored, token_tree_to_syntax_node, SyntheticToken,
SyntheticTokenId,
}, },
token_map::TokenMap, token_map::TokenMap,
}; };

View file

@ -1,6 +1,6 @@
//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`]. //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hash::FxHashMap;
use stdx::{always, non_empty_vec::NonEmptyVec}; use stdx::{always, non_empty_vec::NonEmptyVec};
use syntax::{ use syntax::{
ast::{self, make::tokens::doc_comment}, ast::{self, make::tokens::doc_comment},
@ -35,7 +35,16 @@ pub fn syntax_node_to_token_tree_censored(
(subtree, c.id_alloc.map) (subtree, c.id_alloc.map)
} }
pub type SyntheticToken = (SyntaxKind, SmolStr); #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct SyntheticTokenId(pub u32);
#[derive(Debug, Clone)]
pub struct SyntheticToken {
pub kind: SyntaxKind,
pub text: SmolStr,
pub range: TextRange,
pub id: SyntheticTokenId,
}
// The following items are what `rustc` macro can be parsed into : // The following items are what `rustc` macro can be parsed into :
// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141 // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
@ -153,13 +162,14 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
Some(it) => it, Some(it) => it,
None => break, None => break,
}; };
let synth_id = token.synthetic_id(&conv);
let kind = token.kind(&conv); let kind = token.kind(&conv);
if kind == COMMENT { if kind == COMMENT {
if let Some(tokens) = conv.convert_doc_comment(&token) { if let Some(tokens) = conv.convert_doc_comment(&token) {
// FIXME: There has to be a better way to do this // FIXME: There has to be a better way to do this
// Add the comments token id to the converted doc string // Add the comments token id to the converted doc string
let id = conv.id_alloc().alloc(range); let id = conv.id_alloc().alloc(range, synth_id);
result.extend(tokens.into_iter().map(|mut tt| { result.extend(tokens.into_iter().map(|mut tt| {
if let tt::TokenTree::Subtree(sub) = &mut tt { if let tt::TokenTree::Subtree(sub) = &mut tt {
if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) = if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) =
@ -174,7 +184,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
continue; continue;
} }
let tt = if kind.is_punct() && kind != UNDERSCORE { let tt = if kind.is_punct() && kind != UNDERSCORE {
assert_eq!(range.len(), TextSize::of('.')); // assert_eq!(range.len(), TextSize::of('.'));
if let Some(delim) = subtree.delimiter { if let Some(delim) = subtree.delimiter {
let expected = match delim.kind { let expected = match delim.kind {
@ -226,11 +236,13 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
panic!("Token from lexer must be single char: token = {:#?}", token); panic!("Token from lexer must be single char: token = {:#?}", token);
} }
}; };
tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into() tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range, synth_id) })
.into()
} else { } else {
macro_rules! make_leaf { macro_rules! make_leaf {
($i:ident) => { ($i:ident) => {
tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into() tt::$i { id: conv.id_alloc().alloc(range, synth_id), text: token.to_text(conv) }
.into()
}; };
} }
let leaf: tt::Leaf = match kind { let leaf: tt::Leaf = match kind {
@ -245,14 +257,14 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
let apostrophe = tt::Leaf::from(tt::Punct { let apostrophe = tt::Leaf::from(tt::Punct {
char: '\'', char: '\'',
spacing: tt::Spacing::Joint, spacing: tt::Spacing::Joint,
id: conv.id_alloc().alloc(r), id: conv.id_alloc().alloc(r, synth_id),
}); });
result.push(apostrophe.into()); result.push(apostrophe.into());
let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
let ident = tt::Leaf::from(tt::Ident { let ident = tt::Leaf::from(tt::Ident {
text: SmolStr::new(&token.to_text(conv)[1..]), text: SmolStr::new(&token.to_text(conv)[1..]),
id: conv.id_alloc().alloc(r), id: conv.id_alloc().alloc(r, synth_id),
}); });
result.push(ident.into()); result.push(ident.into());
continue; continue;
@ -273,7 +285,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
conv.id_alloc().close_delim(entry.idx, None); conv.id_alloc().close_delim(entry.idx, None);
let leaf: tt::Leaf = tt::Punct { let leaf: tt::Leaf = tt::Punct {
id: conv.id_alloc().alloc(entry.open_range), id: conv.id_alloc().alloc(entry.open_range, None),
char: match entry.subtree.delimiter.unwrap().kind { char: match entry.subtree.delimiter.unwrap().kind {
tt::DelimiterKind::Parenthesis => '(', tt::DelimiterKind::Parenthesis => '(',
tt::DelimiterKind::Brace => '{', tt::DelimiterKind::Brace => '{',
@ -367,11 +379,18 @@ struct TokenIdAlloc {
} }
impl TokenIdAlloc { impl TokenIdAlloc {
fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId { fn alloc(
&mut self,
absolute_range: TextRange,
synthetic_id: Option<SyntheticTokenId>,
) -> tt::TokenId {
let relative_range = absolute_range - self.global_offset; let relative_range = absolute_range - self.global_offset;
let token_id = tt::TokenId(self.next_id); let token_id = tt::TokenId(self.next_id);
self.next_id += 1; self.next_id += 1;
self.map.insert(token_id, relative_range); self.map.insert(token_id, relative_range);
if let Some(id) = synthetic_id {
self.map.insert_synthetic(token_id, id);
}
token_id token_id
} }
@ -411,6 +430,8 @@ trait SrcToken<Ctx>: std::fmt::Debug {
fn to_char(&self, ctx: &Ctx) -> Option<char>; fn to_char(&self, ctx: &Ctx) -> Option<char>;
fn to_text(&self, ctx: &Ctx) -> SmolStr; fn to_text(&self, ctx: &Ctx) -> SmolStr;
fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
} }
trait TokenConvertor: Sized { trait TokenConvertor: Sized {
@ -437,6 +458,10 @@ impl<'a> SrcToken<RawConvertor<'a>> for usize {
fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr { fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
ctx.lexed.text(*self).into() ctx.lexed.text(*self).into()
} }
fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option<SyntheticTokenId> {
None
}
} }
impl<'a> TokenConvertor for RawConvertor<'a> { impl<'a> TokenConvertor for RawConvertor<'a> {
@ -564,13 +589,14 @@ impl SrcToken<Convertor> for SynToken {
match self { match self {
SynToken::Ordinary(token) => token.kind(), SynToken::Ordinary(token) => token.kind(),
SynToken::Punch(token, _) => token.kind(), SynToken::Punch(token, _) => token.kind(),
SynToken::Synthetic((kind, _)) => *kind, SynToken::Synthetic(token) => token.kind,
} }
} }
fn to_char(&self, _ctx: &Convertor) -> Option<char> { fn to_char(&self, _ctx: &Convertor) -> Option<char> {
match self { match self {
SynToken::Ordinary(_) => None, SynToken::Ordinary(_) => None,
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()), SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
SynToken::Synthetic(_) => None, SynToken::Synthetic(_) => None,
} }
} }
@ -578,7 +604,14 @@ impl SrcToken<Convertor> for SynToken {
match self { match self {
SynToken::Ordinary(token) => token.text().into(), SynToken::Ordinary(token) => token.text().into(),
SynToken::Punch(token, _) => token.text().into(), SynToken::Punch(token, _) => token.text().into(),
SynToken::Synthetic((_, text)) => text.clone(), SynToken::Synthetic(token) => token.text.clone(),
}
}
fn synthetic_id(&self, _ctx: &Convertor) -> Option<SyntheticTokenId> {
match self {
SynToken::Synthetic(token) => Some(token.id),
_ => None,
} }
} }
} }

View file

@ -5,6 +5,8 @@ use std::hash::Hash;
use parser::{SyntaxKind, T}; use parser::{SyntaxKind, T};
use syntax::{TextRange, TextSize}; use syntax::{TextRange, TextSize};
use crate::syntax_bridge::SyntheticTokenId;
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
enum TokenTextRange { enum TokenTextRange {
Token(TextRange), Token(TextRange),
@ -31,6 +33,7 @@ impl TokenTextRange {
pub struct TokenMap { pub struct TokenMap {
/// Maps `tt::TokenId` to the *relative* source range. /// Maps `tt::TokenId` to the *relative* source range.
entries: Vec<(tt::TokenId, TokenTextRange)>, entries: Vec<(tt::TokenId, TokenTextRange)>,
pub synthetic_entries: Vec<(tt::TokenId, SyntheticTokenId)>,
} }
impl TokenMap { impl TokenMap {
@ -57,6 +60,10 @@ impl TokenMap {
.filter_map(move |(_, range)| range.by_kind(kind)) .filter_map(move |(_, range)| range.by_kind(kind))
} }
pub fn synthetic_token_id(&self, token_id: tt::TokenId) -> Option<SyntheticTokenId> {
self.synthetic_entries.iter().find(|(tid, _)| *tid == token_id).map(|(_, id)| *id)
}
pub fn first_range_by_token( pub fn first_range_by_token(
&self, &self,
token_id: tt::TokenId, token_id: tt::TokenId,
@ -73,6 +80,10 @@ impl TokenMap {
self.entries.push((token_id, TokenTextRange::Token(relative_range))); self.entries.push((token_id, TokenTextRange::Token(relative_range)));
} }
pub(crate) fn insert_synthetic(&mut self, token_id: tt::TokenId, id: SyntheticTokenId) {
self.synthetic_entries.push((token_id, id));
}
pub(crate) fn insert_delim( pub(crate) fn insert_delim(
&mut self, &mut self,
token_id: tt::TokenId, token_id: tt::TokenId,

View file

@ -87,6 +87,16 @@ pub struct Ident {
pub id: TokenId, pub id: TokenId,
} }
impl Leaf {
pub fn id(&self) -> TokenId {
match self {
Leaf::Literal(l) => l.id,
Leaf::Punct(p) => p.id,
Leaf::Ident(i) => i.id,
}
}
}
fn print_debug_subtree(f: &mut fmt::Formatter<'_>, subtree: &Subtree, level: usize) -> fmt::Result { fn print_debug_subtree(f: &mut fmt::Formatter<'_>, subtree: &Subtree, level: usize) -> fmt::Result {
let align = " ".repeat(level); let align = " ".repeat(level);