Merge #11444

11444: feat: Fix up syntax errors in attribute macro inputs to make completion work more often r=flodiebold a=flodiebold This implements the "fix up syntax nodes" workaround mentioned in #11014. It isn't much more than a proof of concept; I have only implemented a few cases, but it already helps quite a bit. Some notes: - I'm not super happy about how much the fixup procedure needs to interact with the syntax node -> token tree conversion code (e.g. needing to share the token map). This could maybe be simplified with some refactoring of that code. - It would maybe be nice to have the fixup procedure reuse or share information with the parser, though I'm not really sure how much that would actually help. Co-authored-by: Florian Diebold <flodiebold@gmail.com>
2025-10-01 22:31:43 +00:00 · 2022-02-12 12:48:46 +00:00 · 2022-02-12 12:48:46 +00:00 · 7a17fb9c43
commit 7a17fb9c43
parent 4449a336f6 ccb789b94a
14 changed files with 646 additions and 95 deletions
--- a/crates/hir_expand/src/db.rs
+++ b/crates/hir_expand/src/db.rs
@ -14,7 +14,7 @@ use syntax::{
 };

 use crate::{
-    ast_id_map::AstIdMap, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
+    ast_id_map::AstIdMap, fixup, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
    BuiltinFnLikeExpander, ExpandTo, HirFileId, HirFileIdRepr, MacroCallId, MacroCallKind,
    MacroCallLoc, MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
 };
@ -108,7 +108,10 @@ pub trait AstDatabase: SourceDatabase {

    /// Lowers syntactic macro call to a token tree representation.
    #[salsa::transparent]
-    fn macro_arg(&self, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>>;
+    fn macro_arg(
+        &self,
+        id: MacroCallId,
+    ) -> Option<Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>>;
    /// Extracts syntax node, corresponding to a macro call. That's a firewall
    /// query, only typing in the macro call itself changes the returned
    /// subtree.
@ -146,8 +149,15 @@ pub fn expand_speculative(

    // Build the subtree and token mapping for the speculative args
    let censor = censor_for_macro_input(&loc, &speculative_args);
-    let (mut tt, spec_args_tmap) =
-        mbe::syntax_node_to_token_tree_censored(&speculative_args, &censor);
+    let mut fixups = fixup::fixup_syntax(&speculative_args);
+    fixups.replace.extend(censor.into_iter().map(|node| (node, Vec::new())));
+    let (mut tt, spec_args_tmap, _) = mbe::syntax_node_to_token_tree_with_modifications(
+        &speculative_args,
+        fixups.token_map,
+        fixups.next_id,
+        fixups.replace,
+        fixups.append,
+    );

    let (attr_arg, token_id) = match loc.kind {
        MacroCallKind::Attr { invoc_attr_index, .. } => {
@ -194,7 +204,7 @@ pub fn expand_speculative(

    // Do the actual expansion, we need to directly expand the proc macro due to the attribute args
    // Otherwise the expand query will fetch the non speculative attribute args and pass those instead.
-    let speculative_expansion = if let MacroDefKind::ProcMacro(expander, ..) = loc.def.kind {
+    let mut speculative_expansion = if let MacroDefKind::ProcMacro(expander, ..) = loc.def.kind {
        tt.delimiter = None;
        expander.expand(db, loc.krate, &tt, attr_arg.as_ref())
    } else {
@ -202,6 +212,7 @@ pub fn expand_speculative(
    };

    let expand_to = macro_expand_to(db, actual_macro_call);
+    fixup::reverse_fixups(&mut speculative_expansion.value, &spec_args_tmap, &fixups.undo_info);
    let (node, rev_tmap) = token_tree_to_syntax_node(&speculative_expansion.value, expand_to);

    let range = rev_tmap.first_range_by_token(token_id, token_to_map.kind())?;
@ -289,20 +300,31 @@ fn parse_macro_expansion(
    }
 }

-fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>> {
+fn macro_arg(
+    db: &dyn AstDatabase,
+    id: MacroCallId,
+) -> Option<Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>> {
    let arg = db.macro_arg_text(id)?;
    let loc = db.lookup_intern_macro_call(id);

    let node = SyntaxNode::new_root(arg);
    let censor = censor_for_macro_input(&loc, &node);
-    let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(&node, &censor);
+    let mut fixups = fixup::fixup_syntax(&node);
+    fixups.replace.extend(censor.into_iter().map(|node| (node, Vec::new())));
+    let (mut tt, tmap, _) = mbe::syntax_node_to_token_tree_with_modifications(
+        &node,
+        fixups.token_map,
+        fixups.next_id,
+        fixups.replace,
+        fixups.append,
+    );

    if loc.def.is_proc_macro() {
        // proc macros expect their inputs without parentheses, MBEs expect it with them included
        tt.delimiter = None;
    }

-    Some(Arc::new((tt, tmap)))
+    Some(Arc::new((tt, tmap, fixups.undo_info)))
 }

 fn censor_for_macro_input(loc: &MacroCallLoc, node: &SyntaxNode) -> FxHashSet<SyntaxNode> {
@ -419,10 +441,9 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
        // be reported at the definition site (when we construct a def map).
        Err(err) => return ExpandResult::str_err(format!("invalid macro definition: {}", err)),
    };
-    let ExpandResult { value: tt, err } = expander.expand(db, id, &macro_arg.0);
+    let ExpandResult { value: mut tt, err } = expander.expand(db, id, &macro_arg.0);
    // Set a hard limit for the expanded tt
    let count = tt.count();
-    // XXX: Make ExpandResult a real error and use .map_err instead?
    if TOKEN_LIMIT.check(count).is_err() {
        return ExpandResult::str_err(format!(
            "macro invocation exceeds token limit: produced {} tokens, limit is {}",
@ -431,6 +452,8 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
        ));
    }

+    fixup::reverse_fixups(&mut tt, &macro_arg.1, &macro_arg.2);
+
    ExpandResult { value: Some(Arc::new(tt)), err }
 }

--- a/crates/hir_expand/src/fixup.rs
+++ b/crates/hir_expand/src/fixup.rs
@ -0,0 +1,261 @@
+//! To make attribute macros work reliably when typing, we need to take care to
+//! fix up syntax errors in the code we're passing to them.
+use std::mem;
+
+use mbe::{SyntheticToken, SyntheticTokenId, TokenMap};
+use rustc_hash::FxHashMap;
+use syntax::{
+    ast::{self, AstNode},
+    match_ast, SyntaxKind, SyntaxNode, TextRange,
+};
+use tt::Subtree;
+
+/// The result of calculating fixes for a syntax node -- a bunch of changes
+/// (appending to and replacing nodes), the information that is needed to
+/// reverse those changes afterwards, and a token map.
+#[derive(Debug)]
+pub(crate) struct SyntaxFixups {
+    pub(crate) append: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
+    pub(crate) replace: FxHashMap<SyntaxNode, Vec<SyntheticToken>>,
+    pub(crate) undo_info: SyntaxFixupUndoInfo,
+    pub(crate) token_map: TokenMap,
+    pub(crate) next_id: u32,
+}
+
+/// This is the information needed to reverse the fixups.
+#[derive(Debug, PartialEq, Eq)]
+pub struct SyntaxFixupUndoInfo {
+    original: Vec<Subtree>,
+}
+
+const EMPTY_ID: SyntheticTokenId = SyntheticTokenId(!0);
+
+pub(crate) fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
+    let mut append = FxHashMap::default();
+    let mut replace = FxHashMap::default();
+    let mut preorder = node.preorder();
+    let mut original = Vec::new();
+    let mut token_map = TokenMap::default();
+    let mut next_id = 0;
+    while let Some(event) = preorder.next() {
+        let node = match event {
+            syntax::WalkEvent::Enter(node) => node,
+            syntax::WalkEvent::Leave(_) => continue,
+        };
+
+        if can_handle_error(&node) && has_error_to_handle(&node) {
+            // the node contains an error node, we have to completely replace it by something valid
+            let (original_tree, new_tmap, new_next_id) =
+                mbe::syntax_node_to_token_tree_with_modifications(
+                    &node,
+                    mem::take(&mut token_map),
+                    next_id,
+                    Default::default(),
+                    Default::default(),
+                );
+            token_map = new_tmap;
+            next_id = new_next_id;
+            let idx = original.len() as u32;
+            original.push(original_tree);
+            let replacement = SyntheticToken {
+                kind: SyntaxKind::IDENT,
+                text: "__ra_fixup".into(),
+                range: node.text_range(),
+                id: SyntheticTokenId(idx),
+            };
+            replace.insert(node.clone(), vec![replacement]);
+            preorder.skip_subtree();
+            continue;
+        }
+
+        // In some other situations, we can fix things by just appending some tokens.
+        let end_range = TextRange::empty(node.text_range().end());
+        match_ast! {
+            match node {
+                ast::FieldExpr(it) => {
+                    if it.name_ref().is_none() {
+                        // incomplete field access: some_expr.|
+                        append.insert(node.clone(), vec![
+                            SyntheticToken {
+                                kind: SyntaxKind::IDENT,
+                                text: "__ra_fixup".into(),
+                                range: end_range,
+                                id: EMPTY_ID,
+                            },
+                        ]);
+                    }
+                },
+                ast::ExprStmt(it) => {
+                    if it.semicolon_token().is_none() {
+                        append.insert(node.clone(), vec![
+                            SyntheticToken {
+                                kind: SyntaxKind::SEMICOLON,
+                                text: ";".into(),
+                                range: end_range,
+                                id: EMPTY_ID,
+                            },
+                        ]);
+                    }
+                },
+                _ => (),
+            }
+        }
+    }
+    SyntaxFixups {
+        append,
+        replace,
+        token_map,
+        next_id,
+        undo_info: SyntaxFixupUndoInfo { original },
+    }
+}
+
+fn has_error(node: &SyntaxNode) -> bool {
+    node.children().any(|c| c.kind() == SyntaxKind::ERROR)
+}
+
+fn can_handle_error(node: &SyntaxNode) -> bool {
+    ast::Expr::can_cast(node.kind())
+}
+
+fn has_error_to_handle(node: &SyntaxNode) -> bool {
+    has_error(node) || node.children().any(|c| !can_handle_error(&c) && has_error_to_handle(&c))
+}
+
+pub(crate) fn reverse_fixups(
+    tt: &mut Subtree,
+    token_map: &TokenMap,
+    undo_info: &SyntaxFixupUndoInfo,
+) {
+    tt.token_trees.retain(|tt| match tt {
+        tt::TokenTree::Leaf(leaf) => {
+            token_map.synthetic_token_id(leaf.id()).is_none()
+                || token_map.synthetic_token_id(leaf.id()) != Some(EMPTY_ID)
+        }
+        _ => true,
+    });
+    tt.token_trees.iter_mut().for_each(|tt| match tt {
+        tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map, undo_info),
+        tt::TokenTree::Leaf(leaf) => {
+            if let Some(id) = token_map.synthetic_token_id(leaf.id()) {
+                let original = &undo_info.original[id.0 as usize];
+                *tt = tt::TokenTree::Subtree(original.clone());
+            }
+        }
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use expect_test::{expect, Expect};
+
+    use super::reverse_fixups;
+
+    #[track_caller]
+    fn check(ra_fixture: &str, mut expect: Expect) {
+        let parsed = syntax::SourceFile::parse(ra_fixture);
+        let fixups = super::fixup_syntax(&parsed.syntax_node());
+        let (mut tt, tmap, _) = mbe::syntax_node_to_token_tree_with_modifications(
+            &parsed.syntax_node(),
+            fixups.token_map,
+            fixups.next_id,
+            fixups.replace,
+            fixups.append,
+        );
+
+        let mut actual = tt.to_string();
+        actual.push_str("\n");
+
+        expect.indent(false);
+        expect.assert_eq(&actual);
+
+        // the fixed-up tree should be syntactically valid
+        let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
+        assert_eq!(
+            parse.errors(),
+            &[],
+            "parse has syntax errors. parse tree:\n{:#?}",
+            parse.syntax_node()
+        );
+
+        reverse_fixups(&mut tt, &tmap, &fixups.undo_info);
+
+        // the fixed-up + reversed version should be equivalent to the original input
+        // (but token IDs don't matter)
+        let (original_as_tt, _) = mbe::syntax_node_to_token_tree(&parsed.syntax_node());
+        assert_eq!(tt.to_string(), original_as_tt.to_string());
+    }
+
+    #[test]
+    fn incomplete_field_expr_1() {
+        check(
+            r#"
+fn foo() {
+    a.
+}
+"#,
+            expect![[r#"
+fn foo () {a . __ra_fixup}
+"#]],
+        )
+    }
+
+    #[test]
+    fn incomplete_field_expr_2() {
+        check(
+            r#"
+fn foo() {
+    a. ;
+}
+"#,
+            expect![[r#"
+fn foo () {a . __ra_fixup ;}
+"#]],
+        )
+    }
+
+    #[test]
+    fn incomplete_field_expr_3() {
+        check(
+            r#"
+fn foo() {
+    a. ;
+    bar();
+}
+"#,
+            expect![[r#"
+fn foo () {a . __ra_fixup ; bar () ;}
+"#]],
+        )
+    }
+
+    #[test]
+    fn field_expr_before_call() {
+        // another case that easily happens while typing
+        check(
+            r#"
+fn foo() {
+    a.b
+    bar();
+}
+"#,
+            expect![[r#"
+fn foo () {a . b ; bar () ;}
+"#]],
+        )
+    }
+
+    #[test]
+    fn extraneous_comma() {
+        check(
+            r#"
+fn foo() {
+    bar(,);
+}
+"#,
+            expect![[r#"
+fn foo () {__ra_fixup ;}
+"#]],
+        )
+    }
+}
--- a/crates/hir_expand/src/hygiene.rs
+++ b/crates/hir_expand/src/hygiene.rs
@ -15,6 +15,7 @@ use syntax::{

 use crate::{
    db::{self, AstDatabase},
+    fixup,
    name::{AsName, Name},
    HirFileId, HirFileIdRepr, InFile, MacroCallKind, MacroCallLoc, MacroDefKind, MacroFile,
 };
@ -127,7 +128,7 @@ struct HygieneInfo {
    attr_input_or_mac_def_start: Option<InFile<TextSize>>,

    macro_def: Arc<TokenExpander>,
-    macro_arg: Arc<(tt::Subtree, mbe::TokenMap)>,
+    macro_arg: Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>,
    macro_arg_shift: mbe::Shift,
    exp_map: Arc<mbe::TokenMap>,
 }
--- a/crates/hir_expand/src/lib.rs
+++ b/crates/hir_expand/src/lib.rs
@ -15,6 +15,7 @@ pub mod proc_macro;
 pub mod quote;
 pub mod eager;
 pub mod mod_path;
+mod fixup;

 pub use mbe::{ExpandError, ExpandResult, Origin};

@ -426,7 +427,7 @@ pub struct ExpansionInfo {
    attr_input_or_mac_def: Option<InFile<ast::TokenTree>>,

    macro_def: Arc<TokenExpander>,
-    macro_arg: Arc<(tt::Subtree, mbe::TokenMap)>,
+    macro_arg: Arc<(tt::Subtree, mbe::TokenMap, fixup::SyntaxFixupUndoInfo)>,
    /// A shift built from `macro_arg`'s subtree, relevant for attributes as the item is the macro arg
    /// and as such we need to shift tokens if they are part of an attributes input instead of their item.
    macro_arg_shift: mbe::Shift,