Split out syntax-bridge into a separate crate

2025-09-27 20:42:04 +00:00 · 2024-08-05 10:43:01 +02:00 · 2024-08-05 10:43:01 +02:00 · d2dd4f6d5f
commit d2dd4f6d5f
parent 670a5ab4a9
30 changed files with 268 additions and 140 deletions
--- a/crates/syntax-bridge/src/lib.rs
+++ b/crates/syntax-bridge/src/lib.rs
--- a/crates/syntax-bridge/src/tests.rs
+++ b/crates/syntax-bridge/src/tests.rs
@ -0,0 +1,104 @@
+use rustc_hash::FxHashMap;
+use span::Span;
+use syntax::{ast, AstNode};
+use test_utils::extract_annotations;
+use tt::{
+    buffer::{TokenBuffer, TokenTreeRef},
+    Leaf, Punct, Spacing,
+};
+
+use crate::{
+    dummy_test_span_utils::{DummyTestSpanMap, DUMMY},
+    syntax_node_to_token_tree, DocCommentDesugarMode,
+};
+
+fn check_punct_spacing(fixture: &str) {
+    let source_file = ast::SourceFile::parse(fixture, span::Edition::CURRENT).ok().unwrap();
+    let subtree = syntax_node_to_token_tree(
+        source_file.syntax(),
+        DummyTestSpanMap,
+        DUMMY,
+        DocCommentDesugarMode::Mbe,
+    );
+    let mut annotations: FxHashMap<_, _> = extract_annotations(fixture)
+        .into_iter()
+        .map(|(range, annotation)| {
+            let spacing = match annotation.as_str() {
+                "Alone" => Spacing::Alone,
+                "Joint" => Spacing::Joint,
+                a => panic!("unknown annotation: {a}"),
+            };
+            (range, spacing)
+        })
+        .collect();
+
+    let buf = TokenBuffer::from_subtree(&subtree);
+    let mut cursor = buf.begin();
+    while !cursor.eof() {
+        while let Some(token_tree) = cursor.token_tree() {
+            if let TokenTreeRef::Leaf(
+                Leaf::Punct(Punct { spacing, span: Span { range, .. }, .. }),
+                _,
+            ) = token_tree
+            {
+                if let Some(expected) = annotations.remove(range) {
+                    assert_eq!(expected, *spacing);
+                }
+            }
+            cursor = cursor.bump_subtree();
+        }
+        cursor = cursor.bump();
+    }
+
+    assert!(annotations.is_empty(), "unchecked annotations: {annotations:?}");
+}
+
+#[test]
+fn punct_spacing() {
+    check_punct_spacing(
+        r#"
+fn main() {
+    0+0;
+   //^ Alone
+    0+(0);
+   //^ Alone
+    0<=0;
+   //^ Joint
+   // ^ Alone
+    0<=(0);
+   // ^ Alone
+    a=0;
+   //^ Alone
+    a=(0);
+   //^ Alone
+    a+=0;
+   //^ Joint
+   // ^ Alone
+    a+=(0);
+   // ^ Alone
+    a&&b;
+   //^ Joint
+   // ^ Alone
+    a&&(b);
+   // ^ Alone
+    foo::bar;
+   //  ^ Joint
+   //   ^ Alone
+    use foo::{bar,baz,};
+   //       ^ Alone
+   //            ^ Alone
+   //                ^ Alone
+    struct Struct<'a> {};
+   //            ^ Joint
+   //             ^ Joint
+    Struct::<0>;
+   //       ^ Alone
+    Struct::<{0}>;
+   //       ^ Alone
+    ;;
+  //^ Joint
+  // ^ Alone
+}
+        "#,
+    );
+}
--- a/crates/syntax-bridge/src/to_parser_input.rs
+++ b/crates/syntax-bridge/src/to_parser_input.rs
@ -0,0 +1,119 @@
+//! Convert macro-by-example tokens which are specific to macro expansion into a
+//! format that works for our parser.
+
+use std::fmt;
+
+use span::Edition;
+use syntax::{SyntaxKind, SyntaxKind::*, T};
+
+use tt::buffer::TokenBuffer;
+
+pub fn to_parser_input<S: Copy + fmt::Debug>(
+    edition: Edition,
+    buffer: &TokenBuffer<'_, S>,
+) -> parser::Input {
+    let mut res = parser::Input::default();
+
+    let mut current = buffer.begin();
+
+    while !current.eof() {
+        let cursor = current;
+        let tt = cursor.token_tree();
+
+        // Check if it is lifetime
+        if let Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(punct), _)) = tt {
+            if punct.char == '\'' {
+                let next = cursor.bump();
+                match next.token_tree() {
+                    Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Ident(_ident), _)) => {
+                        res.push(LIFETIME_IDENT);
+                        current = next.bump();
+                        continue;
+                    }
+                    _ => panic!("Next token must be ident : {:#?}", next.token_tree()),
+                }
+            }
+        }
+
+        current = match tt {
+            Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => {
+                match leaf {
+                    tt::Leaf::Literal(lit) => {
+                        let kind = match lit.kind {
+                            tt::LitKind::Byte => SyntaxKind::BYTE,
+                            tt::LitKind::Char => SyntaxKind::CHAR,
+                            tt::LitKind::Integer => SyntaxKind::INT_NUMBER,
+                            tt::LitKind::Float => SyntaxKind::FLOAT_NUMBER,
+                            tt::LitKind::Str | tt::LitKind::StrRaw(_) => SyntaxKind::STRING,
+                            tt::LitKind::ByteStr | tt::LitKind::ByteStrRaw(_) => {
+                                SyntaxKind::BYTE_STRING
+                            }
+                            tt::LitKind::CStr | tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING,
+                            tt::LitKind::Err(_) => SyntaxKind::ERROR,
+                        };
+                        res.push(kind);
+
+                        if kind == FLOAT_NUMBER && !lit.symbol.as_str().ends_with('.') {
+                            // Tag the token as joint if it is float with a fractional part
+                            // we use this jointness to inform the parser about what token split
+                            // event to emit when we encounter a float literal in a field access
+                            res.was_joint();
+                        }
+                    }
+                    tt::Leaf::Ident(ident) => match ident.sym.as_str() {
+                        "_" => res.push(T![_]),
+                        i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
+                        _ if ident.is_raw.yes() => res.push(IDENT),
+                        "gen" if !edition.at_least_2024() => res.push(IDENT),
+                        "dyn" if !edition.at_least_2018() => res.push_ident(DYN_KW),
+                        "async" | "await" | "try" if !edition.at_least_2018() => res.push(IDENT),
+                        text => match SyntaxKind::from_keyword(text) {
+                            Some(kind) => res.push(kind),
+                            None => {
+                                let contextual_keyword = SyntaxKind::from_contextual_keyword(text)
+                                    .unwrap_or(SyntaxKind::IDENT);
+                                res.push_ident(contextual_keyword);
+                            }
+                        },
+                    },
+                    tt::Leaf::Punct(punct) => {
+                        let kind = SyntaxKind::from_char(punct.char)
+                            .unwrap_or_else(|| panic!("{punct:#?} is not a valid punct"));
+                        res.push(kind);
+                        if punct.spacing == tt::Spacing::Joint {
+                            res.was_joint();
+                        }
+                    }
+                }
+                cursor.bump()
+            }
+            Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => {
+                if let Some(kind) = match subtree.delimiter.kind {
+                    tt::DelimiterKind::Parenthesis => Some(T!['(']),
+                    tt::DelimiterKind::Brace => Some(T!['{']),
+                    tt::DelimiterKind::Bracket => Some(T!['[']),
+                    tt::DelimiterKind::Invisible => None,
+                } {
+                    res.push(kind);
+                }
+                cursor.subtree().unwrap()
+            }
+            None => match cursor.end() {
+                Some(subtree) => {
+                    if let Some(kind) = match subtree.delimiter.kind {
+                        tt::DelimiterKind::Parenthesis => Some(T![')']),
+                        tt::DelimiterKind::Brace => Some(T!['}']),
+                        tt::DelimiterKind::Bracket => Some(T![']']),
+                        tt::DelimiterKind::Invisible => None,
+                    } {
+                        res.push(kind);
+                    }
+                    cursor.bump()
+                }
+                None => continue,
+            },
+        };
+    }
+
+    res
+}