11444: feat: Fix up syntax errors in attribute macro inputs to make completion work more often r=flodiebold a=flodiebold

This implements the "fix up syntax nodes" workaround mentioned in #11014. It isn't much more than a proof of concept; I have only implemented a few cases, but it already helps quite a bit.

Some notes:
 - I'm not super happy about how much the fixup procedure needs to interact with the syntax node -> token tree conversion code (e.g. needing to share the token map). This could maybe be simplified with some refactoring of that code.
 - It would maybe be nice to have the fixup procedure reuse or share information with the parser, though I'm not really sure how much that would actually help.

Co-authored-by: Florian Diebold <flodiebold@gmail.com>
This commit is contained in:
bors[bot] 2022-02-12 12:48:46 +00:00 committed by GitHub
commit 7a17fb9c43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 646 additions and 95 deletions

View file

@ -14,7 +14,7 @@ use syntax::{
};
use crate::{
ast_id_map::AstIdMap, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
ast_id_map::AstIdMap, fixup, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
BuiltinFnLikeExpander, ExpandTo, HirFileId, HirFileIdRepr, MacroCallId, MacroCallKind,
MacroCallLoc, MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
};
@ -108,7 +108,10 @@ pub trait AstDatabase: SourceDatabase {
/// Lowers syntactic macro call to a token tree representation.
#[salsa::transparent]
fn macro_arg(&self, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>>;
fn macro_arg(
&self,
id: MacroCallId,
) -> Option<Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>>;
/// Extracts syntax node, corresponding to a macro call. That's a firewall
/// query, only typing in the macro call itself changes the returned
/// subtree.
@ -146,8 +149,15 @@ pub fn expand_speculative(
// Build the subtree and token mapping for the speculative args
let censor = censor_for_macro_input(&loc, &speculative_args);
let (mut tt, spec_args_tmap) =
mbe::syntax_node_to_token_tree_censored(&speculative_args, &censor);
let mut fixups = fixup::fixup_syntax(&speculative_args);
fixups.replace.extend(censor.into_iter().map(|node| (node, Vec::new())));
let (mut tt, spec_args_tmap, _) = mbe::syntax_node_to_token_tree_with_modifications(
&speculative_args,
fixups.token_map,
fixups.next_id,
fixups.replace,
fixups.append,
);
let (attr_arg, token_id) = match loc.kind {
MacroCallKind::Attr { invoc_attr_index, .. } => {
@ -194,7 +204,7 @@ pub fn expand_speculative(
// Do the actual expansion, we need to directly expand the proc macro due to the attribute args
// Otherwise the expand query will fetch the non speculative attribute args and pass those instead.
let speculative_expansion = if let MacroDefKind::ProcMacro(expander, ..) = loc.def.kind {
let mut speculative_expansion = if let MacroDefKind::ProcMacro(expander, ..) = loc.def.kind {
tt.delimiter = None;
expander.expand(db, loc.krate, &tt, attr_arg.as_ref())
} else {
@ -202,6 +212,7 @@ pub fn expand_speculative(
};
let expand_to = macro_expand_to(db, actual_macro_call);
fixup::reverse_fixups(&mut speculative_expansion.value, &spec_args_tmap, &fixups.undo_info);
let (node, rev_tmap) = token_tree_to_syntax_node(&speculative_expansion.value, expand_to);
let range = rev_tmap.first_range_by_token(token_id, token_to_map.kind())?;
@ -289,20 +300,31 @@ fn parse_macro_expansion(
}
}
fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>> {
fn macro_arg(
db: &dyn AstDatabase,
id: MacroCallId,
) -> Option<Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>> {
let arg = db.macro_arg_text(id)?;
let loc = db.lookup_intern_macro_call(id);
let node = SyntaxNode::new_root(arg);
let censor = censor_for_macro_input(&loc, &node);
let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(&node, &censor);
let mut fixups = fixup::fixup_syntax(&node);
fixups.replace.extend(censor.into_iter().map(|node| (node, Vec::new())));
let (mut tt, tmap, _) = mbe::syntax_node_to_token_tree_with_modifications(
&node,
fixups.token_map,
fixups.next_id,
fixups.replace,
fixups.append,
);
if loc.def.is_proc_macro() {
// proc macros expect their inputs without parentheses, MBEs expect it with them included
tt.delimiter = None;
}
Some(Arc::new((tt, tmap)))
Some(Arc::new((tt, tmap, fixups.undo_info)))
}
fn censor_for_macro_input(loc: &MacroCallLoc, node: &SyntaxNode) -> FxHashSet<SyntaxNode> {
@ -419,10 +441,9 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
// be reported at the definition site (when we construct a def map).
Err(err) => return ExpandResult::str_err(format!("invalid macro definition: {}", err)),
};
let ExpandResult { value: tt, err } = expander.expand(db, id, &macro_arg.0);
let ExpandResult { value: mut tt, err } = expander.expand(db, id, &macro_arg.0);
// Set a hard limit for the expanded tt
let count = tt.count();
// XXX: Make ExpandResult a real error and use .map_err instead?
if TOKEN_LIMIT.check(count).is_err() {
return ExpandResult::str_err(format!(
"macro invocation exceeds token limit: produced {} tokens, limit is {}",
@ -431,6 +452,8 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
));
}
fixup::reverse_fixups(&mut tt, &macro_arg.1, &macro_arg.2);
ExpandResult { value: Some(Arc::new(tt)), err }
}

View file

@ -0,0 +1,261 @@
//! To make attribute macros work reliably when typing, we need to take care to
//! fix up syntax errors in the code we're passing to them.
use std::mem;
use mbe::{SyntheticToken, SyntheticTokenId, TokenMap};
use rustc_hash::FxHashMap;
use syntax::{
ast::{self, AstNode},
match_ast, SyntaxKind, SyntaxNode, TextRange,
};
use tt::Subtree;
/// The result of calculating fixes for a syntax node -- a bunch of changes
/// (appending to and replacing nodes), the information that is needed to
/// reverse those changes afterwards, and a token map.
#[derive(Debug)]
pub(crate) struct SyntaxFixups {
pub(crate) append: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
pub(crate) replace: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
pub(crate) undo_info: SyntaxFixupUndoInfo,
pub(crate) token_map: TokenMap,
pub(crate) next_id: u32,
}
/// This is the information needed to reverse the fixups.
#[derive(Debug, PartialEq, Eq)]
pub struct SyntaxFixupUndoInfo {
original: Vec<Subtree>,
}
const EMPTY_ID: SyntheticTokenId = SyntheticTokenId(!0);
pub(crate) fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
let mut append = FxHashMap::default();
let mut replace = FxHashMap::default();
let mut preorder = node.preorder();
let mut original = Vec::new();
let mut token_map = TokenMap::default();
let mut next_id = 0;
while let Some(event) = preorder.next() {
let node = match event {
syntax::WalkEvent::Enter(node) => node,
syntax::WalkEvent::Leave(_) => continue,
};
if can_handle_error(&node) && has_error_to_handle(&node) {
// the node contains an error node, we have to completely replace it by something valid
let (original_tree, new_tmap, new_next_id) =
mbe::syntax_node_to_token_tree_with_modifications(
&node,
mem::take(&mut token_map),
next_id,
Default::default(),
Default::default(),
);
token_map = new_tmap;
next_id = new_next_id;
let idx = original.len() as u32;
original.push(original_tree);
let replacement = SyntheticToken {
kind: SyntaxKind::IDENT,
text: "__ra_fixup".into(),
range: node.text_range(),
id: SyntheticTokenId(idx),
};
replace.insert(node.clone(), vec![replacement]);
preorder.skip_subtree();
continue;
}
// In some other situations, we can fix things by just appending some tokens.
let end_range = TextRange::empty(node.text_range().end());
match_ast! {
match node {
ast::FieldExpr(it) => {
if it.name_ref().is_none() {
// incomplete field access: some_expr.|
append.insert(node.clone(), vec![
SyntheticToken {
kind: SyntaxKind::IDENT,
text: "__ra_fixup".into(),
range: end_range,
id: EMPTY_ID,
},
]);
}
},
ast::ExprStmt(it) => {
if it.semicolon_token().is_none() {
append.insert(node.clone(), vec![
SyntheticToken {
kind: SyntaxKind::SEMICOLON,
text: ";".into(),
range: end_range,
id: EMPTY_ID,
},
]);
}
},
_ => (),
}
}
}
SyntaxFixups {
append,
replace,
token_map,
next_id,
undo_info: SyntaxFixupUndoInfo { original },
}
}
fn has_error(node: &SyntaxNode) -> bool {
node.children().any(|c| c.kind() == SyntaxKind::ERROR)
}
fn can_handle_error(node: &SyntaxNode) -> bool {
ast::Expr::can_cast(node.kind())
}
fn has_error_to_handle(node: &SyntaxNode) -> bool {
has_error(node) || node.children().any(|c| !can_handle_error(&c) && has_error_to_handle(&c))
}
pub(crate) fn reverse_fixups(
tt: &mut Subtree,
token_map: &TokenMap,
undo_info: &SyntaxFixupUndoInfo,
) {
tt.token_trees.retain(|tt| match tt {
tt::TokenTree::Leaf(leaf) => {
token_map.synthetic_token_id(leaf.id()).is_none()
|| token_map.synthetic_token_id(leaf.id()) != Some(EMPTY_ID)
}
_ => true,
});
tt.token_trees.iter_mut().for_each(|tt| match tt {
tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map, undo_info),
tt::TokenTree::Leaf(leaf) => {
if let Some(id) = token_map.synthetic_token_id(leaf.id()) {
let original = &undo_info.original[id.0 as usize];
*tt = tt::TokenTree::Subtree(original.clone());
}
}
});
}
#[cfg(test)]
mod tests {
use expect_test::{expect, Expect};
use super::reverse_fixups;
#[track_caller]
fn check(ra_fixture: &str, mut expect: Expect) {
let parsed = syntax::SourceFile::parse(ra_fixture);
let fixups = super::fixup_syntax(&parsed.syntax_node());
let (mut tt, tmap, _) = mbe::syntax_node_to_token_tree_with_modifications(
&parsed.syntax_node(),
fixups.token_map,
fixups.next_id,
fixups.replace,
fixups.append,
);
let mut actual = tt.to_string();
actual.push_str("\n");
expect.indent(false);
expect.assert_eq(&actual);
// the fixed-up tree should be syntactically valid
let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
assert_eq!(
parse.errors(),
&[],
"parse has syntax errors. parse tree:\n{:#?}",
parse.syntax_node()
);
reverse_fixups(&mut tt, &tmap, &fixups.undo_info);
// the fixed-up + reversed version should be equivalent to the original input
// (but token IDs don't matter)
let (original_as_tt, _) = mbe::syntax_node_to_token_tree(&parsed.syntax_node());
assert_eq!(tt.to_string(), original_as_tt.to_string());
}
#[test]
fn incomplete_field_expr_1() {
check(
r#"
fn foo() {
a.
}
"#,
expect![[r#"
fn foo () {a . __ra_fixup}
"#]],
)
}
#[test]
fn incomplete_field_expr_2() {
check(
r#"
fn foo() {
a. ;
}
"#,
expect![[r#"
fn foo () {a . __ra_fixup ;}
"#]],
)
}
#[test]
fn incomplete_field_expr_3() {
check(
r#"
fn foo() {
a. ;
bar();
}
"#,
expect![[r#"
fn foo () {a . __ra_fixup ; bar () ;}
"#]],
)
}
#[test]
fn field_expr_before_call() {
// another case that easily happens while typing
check(
r#"
fn foo() {
a.b
bar();
}
"#,
expect![[r#"
fn foo () {a . b ; bar () ;}
"#]],
)
}
#[test]
fn extraneous_comma() {
check(
r#"
fn foo() {
bar(,);
}
"#,
expect![[r#"
fn foo () {__ra_fixup ;}
"#]],
)
}
}

View file

@ -15,6 +15,7 @@ use syntax::{
use crate::{
db::{self, AstDatabase},
fixup,
name::{AsName, Name},
HirFileId, HirFileIdRepr, InFile, MacroCallKind, MacroCallLoc, MacroDefKind, MacroFile,
};
@ -127,7 +128,7 @@ struct HygieneInfo {
attr_input_or_mac_def_start: Option<InFile<TextSize>>,
macro_def: Arc<TokenExpander>,
macro_arg: Arc<(tt::Subtree, mbe::TokenMap)>,
macro_arg: Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>,
macro_arg_shift: mbe::Shift,
exp_map: Arc<mbe::TokenMap>,
}

View file

@ -15,6 +15,7 @@ pub mod proc_macro;
pub mod quote;
pub mod eager;
pub mod mod_path;
mod fixup;
pub use mbe::{ExpandError, ExpandResult, Origin};
@ -426,7 +427,7 @@ pub struct ExpansionInfo {
attr_input_or_mac_def: Option<InFile<ast::TokenTree>>,
macro_def: Arc<TokenExpander>,
macro_arg: Arc<(tt::Subtree, mbe::TokenMap)>,
macro_arg: Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>,
/// A shift built from `macro_arg`'s subtree, relevant for attributes as the item is the macro arg
/// and as such we need to shift tokens if they are part of an attributes input instead of their item.
macro_arg_shift: mbe::Shift,