proc_macro_api/legacy_protocol/msg/
flat.rs

1//! Serialization-friendly representation of `tt::TopSubtree`.
2//!
3//! It is possible to serialize `TopSubtree` recursively, as a tree, but using
4//! arbitrary-nested trees in JSON is problematic, as they can cause the JSON
5//! parser to overflow the stack.
6//!
7//! Additionally, such implementation would be pretty verbose, and we do care
8//! about performance here a bit.
9//!
10//! So what this module does is dumping a `tt::TopSubtree` into a bunch of flat
11//! array of numbers. See the test in the parent module to get an example
12//! output.
13//!
14//! ```json
15//!  {
16//!    // Array of subtrees, each subtree is represented by 4 numbers:
17//!    // id of delimiter, delimiter kind, index of first child in `token_tree`,
18//!    // index of last child in `token_tree`
19//!    "subtree":[4294967295,0,0,5,2,2,5,5],
20//!    // 2 ints per literal: [token id, index into `text`]
21//!    "literal":[4294967295,1],
22//!    // 3 ints per punct: [token id, char, spacing]
23//!    "punct":[4294967295,64,1],
24//!    // 2 ints per ident: [token id, index into `text`]
25//!    "ident":   [0,0,1,1],
26//!    // children of all subtrees, concatenated. Each child is represented as `index << 2 | tag`
27//!    // where tag denotes one of subtree, literal, punct or ident.
28//!    "token_tree":[3,7,1,4],
29//!    // Strings shared by idents and literals
30//!    "text": ["struct","Foo"]
31//!  }
32//! ```
33//!
34//! We probably should replace most of the code here with bincode someday, but,
35//! as we don't have bincode in Cargo.toml yet, lets stick with serde_json for
36//! the time being.
37
38use std::collections::VecDeque;
39
40use intern::Symbol;
41use rustc_hash::FxHashMap;
42use serde_derive::{Deserialize, Serialize};
43use span::{EditionedFileId, ErasedFileAstId, Span, SpanAnchor, SyntaxContext, TextRange};
44
45use crate::{
46    legacy_protocol::SpanId,
47    version::{ENCODE_CLOSE_SPAN_VERSION, EXTENDED_LEAF_DATA},
48};
49
50pub type SpanDataIndexMap =
51    indexmap::IndexSet<Span, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
52
53pub fn serialize_span_data_index_map(map: &SpanDataIndexMap) -> Vec<u32> {
54    map.iter()
55        .flat_map(|span| {
56            [
57                span.anchor.file_id.as_u32(),
58                span.anchor.ast_id.into_raw(),
59                span.range.start().into(),
60                span.range.end().into(),
61                span.ctx.into_u32(),
62            ]
63        })
64        .collect()
65}
66
67pub fn deserialize_span_data_index_map(map: &[u32]) -> SpanDataIndexMap {
68    debug_assert!(map.len().is_multiple_of(5));
69    map.chunks_exact(5)
70        .map(|span| {
71            let &[file_id, ast_id, start, end, e] = span else { unreachable!() };
72            Span {
73                anchor: SpanAnchor {
74                    file_id: EditionedFileId::from_raw(file_id),
75                    ast_id: ErasedFileAstId::from_raw(ast_id),
76                },
77                range: TextRange::new(start.into(), end.into()),
78                // SAFETY: We only receive spans from the server. If someone mess up the communication UB can happen,
79                // but that will be their problem.
80                ctx: unsafe { SyntaxContext::from_u32(e) },
81            }
82        })
83        .collect()
84}
85
86#[derive(Serialize, Deserialize, Debug)]
87pub struct FlatTree {
88    subtree: Vec<u32>,
89    literal: Vec<u32>,
90    punct: Vec<u32>,
91    ident: Vec<u32>,
92    token_tree: Vec<u32>,
93    text: Vec<String>,
94}
95
96struct SubtreeRepr {
97    open: SpanId,
98    close: SpanId,
99    kind: tt::DelimiterKind,
100    tt: [u32; 2],
101}
102
103struct LiteralRepr {
104    id: SpanId,
105    text: u32,
106    suffix: u32,
107    kind: u16,
108}
109
110struct PunctRepr {
111    id: SpanId,
112    char: char,
113    spacing: tt::Spacing,
114}
115
116struct IdentRepr {
117    id: SpanId,
118    text: u32,
119    is_raw: bool,
120}
121
122impl FlatTree {
123    pub fn new(
124        subtree: tt::SubtreeView<'_, Span>,
125        version: u32,
126        span_data_table: &mut SpanDataIndexMap,
127    ) -> FlatTree {
128        let mut w = Writer::<Span> {
129            string_table: FxHashMap::default(),
130            work: VecDeque::new(),
131            span_data_table,
132
133            subtree: Vec::new(),
134            literal: Vec::new(),
135            punct: Vec::new(),
136            ident: Vec::new(),
137            token_tree: Vec::new(),
138            text: Vec::new(),
139            version,
140        };
141        w.write(subtree);
142
143        FlatTree {
144            subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
145                write_vec(w.subtree, SubtreeRepr::write_with_close_span)
146            } else {
147                write_vec(w.subtree, SubtreeRepr::write)
148            },
149            literal: if version >= EXTENDED_LEAF_DATA {
150                write_vec(w.literal, LiteralRepr::write_with_kind)
151            } else {
152                write_vec(w.literal, LiteralRepr::write)
153            },
154            punct: write_vec(w.punct, PunctRepr::write),
155            ident: if version >= EXTENDED_LEAF_DATA {
156                write_vec(w.ident, IdentRepr::write_with_rawness)
157            } else {
158                write_vec(w.ident, IdentRepr::write)
159            },
160            token_tree: w.token_tree,
161            text: w.text,
162        }
163    }
164
165    pub fn new_raw<T: SpanTransformer<Table = ()>>(
166        subtree: tt::SubtreeView<'_, T::Span>,
167        version: u32,
168    ) -> FlatTree {
169        let mut w = Writer::<T> {
170            string_table: FxHashMap::default(),
171            work: VecDeque::new(),
172            span_data_table: &mut (),
173
174            subtree: Vec::new(),
175            literal: Vec::new(),
176            punct: Vec::new(),
177            ident: Vec::new(),
178            token_tree: Vec::new(),
179            text: Vec::new(),
180            version,
181        };
182        w.write(subtree);
183
184        FlatTree {
185            subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
186                write_vec(w.subtree, SubtreeRepr::write_with_close_span)
187            } else {
188                write_vec(w.subtree, SubtreeRepr::write)
189            },
190            literal: if version >= EXTENDED_LEAF_DATA {
191                write_vec(w.literal, LiteralRepr::write_with_kind)
192            } else {
193                write_vec(w.literal, LiteralRepr::write)
194            },
195            punct: write_vec(w.punct, PunctRepr::write),
196            ident: if version >= EXTENDED_LEAF_DATA {
197                write_vec(w.ident, IdentRepr::write_with_rawness)
198            } else {
199                write_vec(w.ident, IdentRepr::write)
200            },
201            token_tree: w.token_tree,
202            text: w.text,
203        }
204    }
205
206    pub fn to_subtree_resolved(
207        self,
208        version: u32,
209        span_data_table: &SpanDataIndexMap,
210    ) -> tt::TopSubtree<Span> {
211        Reader::<Span> {
212            subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
213                read_vec(self.subtree, SubtreeRepr::read_with_close_span)
214            } else {
215                read_vec(self.subtree, SubtreeRepr::read)
216            },
217            literal: if version >= EXTENDED_LEAF_DATA {
218                read_vec(self.literal, LiteralRepr::read_with_kind)
219            } else {
220                read_vec(self.literal, LiteralRepr::read)
221            },
222            punct: read_vec(self.punct, PunctRepr::read),
223            ident: if version >= EXTENDED_LEAF_DATA {
224                read_vec(self.ident, IdentRepr::read_with_rawness)
225            } else {
226                read_vec(self.ident, IdentRepr::read)
227            },
228            token_tree: self.token_tree,
229            text: self.text,
230            span_data_table,
231            version,
232        }
233        .read()
234    }
235
236    pub fn to_subtree_unresolved<T: SpanTransformer<Table = ()>>(
237        self,
238        version: u32,
239    ) -> tt::TopSubtree<T::Span> {
240        Reader::<T> {
241            subtree: if version >= ENCODE_CLOSE_SPAN_VERSION {
242                read_vec(self.subtree, SubtreeRepr::read_with_close_span)
243            } else {
244                read_vec(self.subtree, SubtreeRepr::read)
245            },
246            literal: if version >= EXTENDED_LEAF_DATA {
247                read_vec(self.literal, LiteralRepr::read_with_kind)
248            } else {
249                read_vec(self.literal, LiteralRepr::read)
250            },
251            punct: read_vec(self.punct, PunctRepr::read),
252            ident: if version >= EXTENDED_LEAF_DATA {
253                read_vec(self.ident, IdentRepr::read_with_rawness)
254            } else {
255                read_vec(self.ident, IdentRepr::read)
256            },
257            token_tree: self.token_tree,
258            text: self.text,
259            span_data_table: &(),
260            version,
261        }
262        .read()
263    }
264}
265
266fn read_vec<T, F: Fn([u32; N]) -> T, const N: usize>(xs: Vec<u32>, f: F) -> Vec<T> {
267    let mut chunks = xs.chunks_exact(N);
268    let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect();
269    assert!(chunks.remainder().is_empty());
270    res
271}
272
273fn write_vec<T, F: Fn(T) -> [u32; N], const N: usize>(xs: Vec<T>, f: F) -> Vec<u32> {
274    xs.into_iter().flat_map(f).collect()
275}
276
277impl SubtreeRepr {
278    fn write(self) -> [u32; 4] {
279        let kind = match self.kind {
280            tt::DelimiterKind::Invisible => 0,
281            tt::DelimiterKind::Parenthesis => 1,
282            tt::DelimiterKind::Brace => 2,
283            tt::DelimiterKind::Bracket => 3,
284        };
285        [self.open.0, kind, self.tt[0], self.tt[1]]
286    }
287    fn read([open, kind, lo, len]: [u32; 4]) -> SubtreeRepr {
288        let kind = match kind {
289            0 => tt::DelimiterKind::Invisible,
290            1 => tt::DelimiterKind::Parenthesis,
291            2 => tt::DelimiterKind::Brace,
292            3 => tt::DelimiterKind::Bracket,
293            other => panic!("bad kind {other}"),
294        };
295        SubtreeRepr { open: SpanId(open), close: SpanId(!0), kind, tt: [lo, len] }
296    }
297    fn write_with_close_span(self) -> [u32; 5] {
298        let kind = match self.kind {
299            tt::DelimiterKind::Invisible => 0,
300            tt::DelimiterKind::Parenthesis => 1,
301            tt::DelimiterKind::Brace => 2,
302            tt::DelimiterKind::Bracket => 3,
303        };
304        [self.open.0, self.close.0, kind, self.tt[0], self.tt[1]]
305    }
306    fn read_with_close_span([open, close, kind, lo, len]: [u32; 5]) -> SubtreeRepr {
307        let kind = match kind {
308            0 => tt::DelimiterKind::Invisible,
309            1 => tt::DelimiterKind::Parenthesis,
310            2 => tt::DelimiterKind::Brace,
311            3 => tt::DelimiterKind::Bracket,
312            other => panic!("bad kind {other}"),
313        };
314        SubtreeRepr { open: SpanId(open), close: SpanId(close), kind, tt: [lo, len] }
315    }
316}
317
318impl LiteralRepr {
319    fn write(self) -> [u32; 2] {
320        [self.id.0, self.text]
321    }
322    fn read([id, text]: [u32; 2]) -> LiteralRepr {
323        LiteralRepr { id: SpanId(id), text, kind: 0, suffix: !0 }
324    }
325    fn write_with_kind(self) -> [u32; 4] {
326        [self.id.0, self.text, self.kind as u32, self.suffix]
327    }
328    fn read_with_kind([id, text, kind, suffix]: [u32; 4]) -> LiteralRepr {
329        LiteralRepr { id: SpanId(id), text, kind: kind as u16, suffix }
330    }
331}
332
333impl PunctRepr {
334    fn write(self) -> [u32; 3] {
335        let spacing = match self.spacing {
336            tt::Spacing::Alone | tt::Spacing::JointHidden => 0,
337            tt::Spacing::Joint => 1,
338        };
339        [self.id.0, self.char as u32, spacing]
340    }
341    fn read([id, char, spacing]: [u32; 3]) -> PunctRepr {
342        let spacing = match spacing {
343            0 => tt::Spacing::Alone,
344            1 => tt::Spacing::Joint,
345            other => panic!("bad spacing {other}"),
346        };
347        PunctRepr { id: SpanId(id), char: char.try_into().unwrap(), spacing }
348    }
349}
350
351impl IdentRepr {
352    fn write(self) -> [u32; 2] {
353        [self.id.0, self.text]
354    }
355    fn read(data: [u32; 2]) -> IdentRepr {
356        IdentRepr { id: SpanId(data[0]), text: data[1], is_raw: false }
357    }
358    fn write_with_rawness(self) -> [u32; 3] {
359        [self.id.0, self.text, self.is_raw as u32]
360    }
361    fn read_with_rawness([id, text, is_raw]: [u32; 3]) -> IdentRepr {
362        IdentRepr { id: SpanId(id), text, is_raw: is_raw == 1 }
363    }
364}
365
366pub trait SpanTransformer {
367    type Table;
368    type Span: Copy;
369    fn token_id_of(table: &mut Self::Table, s: Self::Span) -> SpanId;
370    fn span_for_token_id(table: &Self::Table, id: SpanId) -> Self::Span;
371}
372impl SpanTransformer for SpanId {
373    type Table = ();
374    type Span = Self;
375    fn token_id_of((): &mut Self::Table, token_id: Self::Span) -> SpanId {
376        token_id
377    }
378
379    fn span_for_token_id((): &Self::Table, id: SpanId) -> Self::Span {
380        id
381    }
382}
383impl SpanTransformer for Span {
384    type Table = SpanDataIndexMap;
385    type Span = Self;
386    fn token_id_of(table: &mut Self::Table, span: Self::Span) -> SpanId {
387        SpanId(table.insert_full(span).0 as u32)
388    }
389    fn span_for_token_id(table: &Self::Table, id: SpanId) -> Self::Span {
390        *table.get_index(id.0 as usize).unwrap_or_else(|| &table[0])
391    }
392}
393
394struct Writer<'a, 'span, S: SpanTransformer> {
395    work: VecDeque<(usize, tt::iter::TtIter<'a, S::Span>)>,
396    string_table: FxHashMap<std::borrow::Cow<'a, str>, u32>,
397    span_data_table: &'span mut S::Table,
398    version: u32,
399
400    subtree: Vec<SubtreeRepr>,
401    literal: Vec<LiteralRepr>,
402    punct: Vec<PunctRepr>,
403    ident: Vec<IdentRepr>,
404    token_tree: Vec<u32>,
405    text: Vec<String>,
406}
407
408impl<'a, T: SpanTransformer> Writer<'a, '_, T> {
409    fn write(&mut self, root: tt::SubtreeView<'a, T::Span>) {
410        let subtree = root.top_subtree();
411        self.enqueue(subtree, root.iter());
412        while let Some((idx, subtree)) = self.work.pop_front() {
413            self.subtree(idx, subtree);
414        }
415    }
416
417    fn token_id_of(&mut self, span: T::Span) -> SpanId {
418        T::token_id_of(self.span_data_table, span)
419    }
420
421    fn subtree(&mut self, idx: usize, subtree: tt::iter::TtIter<'a, T::Span>) {
422        let mut first_tt = self.token_tree.len();
423        let n_tt = subtree.clone().count(); // FIXME: `count()` walks over the entire iterator.
424        self.token_tree.resize(first_tt + n_tt, !0);
425
426        self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32];
427
428        for child in subtree {
429            let idx_tag = match child {
430                tt::iter::TtElement::Subtree(subtree, subtree_iter) => {
431                    let idx = self.enqueue(subtree, subtree_iter);
432                    idx << 2
433                }
434                tt::iter::TtElement::Leaf(leaf) => match leaf {
435                    tt::Leaf::Literal(lit) => {
436                        let idx = self.literal.len() as u32;
437                        let id = self.token_id_of(lit.span);
438                        let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA {
439                            (
440                                self.intern(lit.symbol.as_str()),
441                                lit.suffix.as_ref().map(|s| self.intern(s.as_str())).unwrap_or(!0),
442                            )
443                        } else {
444                            (self.intern_owned(format!("{lit}")), !0)
445                        };
446                        self.literal.push(LiteralRepr {
447                            id,
448                            text,
449                            kind: u16::from_le_bytes(match lit.kind {
450                                tt::LitKind::Err(_) => [0, 0],
451                                tt::LitKind::Byte => [1, 0],
452                                tt::LitKind::Char => [2, 0],
453                                tt::LitKind::Integer => [3, 0],
454                                tt::LitKind::Float => [4, 0],
455                                tt::LitKind::Str => [5, 0],
456                                tt::LitKind::StrRaw(r) => [6, r],
457                                tt::LitKind::ByteStr => [7, 0],
458                                tt::LitKind::ByteStrRaw(r) => [8, r],
459                                tt::LitKind::CStr => [9, 0],
460                                tt::LitKind::CStrRaw(r) => [10, r],
461                            }),
462                            suffix,
463                        });
464                        (idx << 2) | 0b01
465                    }
466                    tt::Leaf::Punct(punct) => {
467                        let idx = self.punct.len() as u32;
468                        let id = self.token_id_of(punct.span);
469                        self.punct.push(PunctRepr { char: punct.char, spacing: punct.spacing, id });
470                        (idx << 2) | 0b10
471                    }
472                    tt::Leaf::Ident(ident) => {
473                        let idx = self.ident.len() as u32;
474                        let id = self.token_id_of(ident.span);
475                        let text = if self.version >= EXTENDED_LEAF_DATA {
476                            self.intern(ident.sym.as_str())
477                        } else if ident.is_raw.yes() {
478                            self.intern_owned(format!("r#{}", ident.sym.as_str(),))
479                        } else {
480                            self.intern(ident.sym.as_str())
481                        };
482                        self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() });
483                        (idx << 2) | 0b11
484                    }
485                },
486            };
487            self.token_tree[first_tt] = idx_tag;
488            first_tt += 1;
489        }
490    }
491
492    fn enqueue(
493        &mut self,
494        subtree: &'a tt::Subtree<T::Span>,
495        contents: tt::iter::TtIter<'a, T::Span>,
496    ) -> u32 {
497        let idx = self.subtree.len();
498        let open = self.token_id_of(subtree.delimiter.open);
499        let close = self.token_id_of(subtree.delimiter.close);
500        let delimiter_kind = subtree.delimiter.kind;
501        self.subtree.push(SubtreeRepr { open, close, kind: delimiter_kind, tt: [!0, !0] });
502        self.work.push_back((idx, contents));
503        idx as u32
504    }
505
506    pub(crate) fn intern(&mut self, text: &'a str) -> u32 {
507        let table = &mut self.text;
508        *self.string_table.entry(text.into()).or_insert_with(|| {
509            let idx = table.len();
510            table.push(text.to_owned());
511            idx as u32
512        })
513    }
514
515    pub(crate) fn intern_owned(&mut self, text: String) -> u32 {
516        let table = &mut self.text;
517        *self.string_table.entry(text.clone().into()).or_insert_with(|| {
518            let idx = table.len();
519            table.push(text);
520            idx as u32
521        })
522    }
523}
524
525struct Reader<'span, S: SpanTransformer> {
526    version: u32,
527    subtree: Vec<SubtreeRepr>,
528    literal: Vec<LiteralRepr>,
529    punct: Vec<PunctRepr>,
530    ident: Vec<IdentRepr>,
531    token_tree: Vec<u32>,
532    text: Vec<String>,
533    span_data_table: &'span S::Table,
534}
535
536impl<T: SpanTransformer> Reader<'_, T> {
537    pub(crate) fn read(self) -> tt::TopSubtree<T::Span> {
538        let mut res: Vec<Option<(tt::Delimiter<T::Span>, Vec<tt::TokenTree<T::Span>>)>> =
539            vec![None; self.subtree.len()];
540        let read_span = |id| T::span_for_token_id(self.span_data_table, id);
541        for i in (0..self.subtree.len()).rev() {
542            let repr = &self.subtree[i];
543            let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize];
544            let delimiter = tt::Delimiter {
545                open: read_span(repr.open),
546                close: read_span(repr.close),
547                kind: repr.kind,
548            };
549            let mut s = Vec::new();
550            for &idx_tag in token_trees {
551                let tag = idx_tag & 0b11;
552                let idx = (idx_tag >> 2) as usize;
553                match tag {
554                    // XXX: we iterate subtrees in reverse to guarantee
555                    // that this unwrap doesn't fire.
556                    0b00 => {
557                        let (delimiter, subtree) = res[idx].take().unwrap();
558                        s.push(tt::TokenTree::Subtree(tt::Subtree {
559                            delimiter,
560                            len: subtree.len() as u32,
561                        }));
562                        s.extend(subtree)
563                    }
564                    0b01 => {
565                        use tt::LitKind::*;
566                        let repr = &self.literal[idx];
567                        let text = self.text[repr.text as usize].as_str();
568                        let span = read_span(repr.id);
569                        s.push(
570                            tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA {
571                                tt::Literal {
572                                    symbol: Symbol::intern(text),
573                                    span,
574                                    kind: match u16::to_le_bytes(repr.kind) {
575                                        [0, _] => Err(()),
576                                        [1, _] => Byte,
577                                        [2, _] => Char,
578                                        [3, _] => Integer,
579                                        [4, _] => Float,
580                                        [5, _] => Str,
581                                        [6, r] => StrRaw(r),
582                                        [7, _] => ByteStr,
583                                        [8, r] => ByteStrRaw(r),
584                                        [9, _] => CStr,
585                                        [10, r] => CStrRaw(r),
586                                        _ => unreachable!(),
587                                    },
588                                    suffix: if repr.suffix != !0 {
589                                        Some(Symbol::intern(
590                                            self.text[repr.suffix as usize].as_str(),
591                                        ))
592                                    } else {
593                                        None
594                                    },
595                                }
596                            } else {
597                                tt::token_to_literal(text, span)
598                            })
599                            .into(),
600                        )
601                    }
602                    0b10 => {
603                        let repr = &self.punct[idx];
604                        s.push(
605                            tt::Leaf::Punct(tt::Punct {
606                                char: repr.char,
607                                spacing: repr.spacing,
608                                span: read_span(repr.id),
609                            })
610                            .into(),
611                        )
612                    }
613                    0b11 => {
614                        let repr = &self.ident[idx];
615                        let text = self.text[repr.text as usize].as_str();
616                        let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA {
617                            (
618                                if repr.is_raw { tt::IdentIsRaw::Yes } else { tt::IdentIsRaw::No },
619                                text,
620                            )
621                        } else {
622                            tt::IdentIsRaw::split_from_symbol(text)
623                        };
624                        s.push(
625                            tt::Leaf::Ident(tt::Ident {
626                                sym: Symbol::intern(text),
627                                span: read_span(repr.id),
628                                is_raw,
629                            })
630                            .into(),
631                        )
632                    }
633                    other => panic!("bad tag: {other}"),
634                }
635            }
636            res[i] = Some((delimiter, s));
637        }
638
639        let (delimiter, mut res) = res[0].take().unwrap();
640        res.insert(0, tt::TokenTree::Subtree(tt::Subtree { delimiter, len: res.len() as u32 }));
641        tt::TopSubtree(res.into_boxed_slice())
642    }
643}